Re: [PATCH] /proc/kcore: Update physical address for kcore ram and text

2017-01-24 Thread Pratyush Anand

Hi Dave,

On Wednesday 25 January 2017 11:59 AM, Dave Young wrote:

Hi Pratyush
On 01/25/17 at 10:14am, Pratyush Anand wrote:

Currently all the p_paddr of PT_LOAD headers are assigned to 0, which is
not true and could be misleading, since 0 is a valid physical address.

I do not know the history of /proc/kcore, so a question is why the
p_addr was set as 0, if there were some reasons and if this could cause
some risk or breakage.



I do not know why it was 0, which is a valid physical address. But 
certainly, it might break some user space tools, and those need to be 
fixed. For example, see following code from kexec-tools


kexec/kexec-elf.c:build_mem_phdrs()

435 if ((phdr->p_paddr + phdr->p_memsz) < phdr->p_paddr) {
436 /* The memory address wraps */
437 if (probe_debug) {
438 fprintf(stderr, "ELF address wrap 
around\n");

439 }
440 return -1;
441 }

We do not need to perform above check for an invalid physical address.

I think, kexec-tools and makedumpfile will need fixup. I already have 
those fixup which will be sent upstream once this patch makes through.
Pro with this approach is that, it will help to calculate variable like 
page_offset, phys_base from PT_LOAD even when they are randomized and 
therefore will reduce many variable and version specific values in user 
space tools.


~Pratyush

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v30 04/11] arm64: mm: allow for unmapping memory region from kernel mapping

2017-01-24 Thread AKASHI Takahiro
On Tue, Jan 24, 2017 at 05:02:20PM +0530, Pratyush Anand wrote:
> 
> 
> On Tuesday 24 January 2017 02:19 PM, AKASHI Takahiro wrote:
> >The current implementation of create_mapping_late() is only allowed
> >to modify permission attributes (read-only or read-write) against
> >the existing kernel mapping.
> >
> >In this patch, PAGE_KERNEL_INVALID protection attribute is introduced.
> >We will now be able to invalidate (or unmap) some part of the existing
> >kernel mapping by specifying PAGE_KERNEL_INVALID to create_mapping_late().
> >
> >This feature will be used in a suceeding kdump patch to protect
> >the memory reserved for crash dump kernel once after loaded.
> >
> >Signed-off-by: AKASHI Takahiro 
> >---
> > arch/arm64/include/asm/mmu.h   |  2 ++
> > arch/arm64/include/asm/pgtable-hwdef.h |  2 ++
> > arch/arm64/include/asm/pgtable-prot.h  |  1 +
> > arch/arm64/include/asm/pgtable.h   |  4 
> > arch/arm64/mm/mmu.c| 29 -
> > 5 files changed, 29 insertions(+), 9 deletions(-)
> >
> >diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
> >index 47619411f0ff..a6c1367527bc 100644
> >--- a/arch/arm64/include/asm/mmu.h
> >+++ b/arch/arm64/include/asm/mmu.h
> >@@ -36,6 +36,8 @@ extern void init_mem_pgprot(void);
> > extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
> >unsigned long virt, phys_addr_t size,
> >pgprot_t prot, bool page_mappings_only);
> >+extern void create_mapping_late(phys_addr_t phys, unsigned long virt,
> >+phys_addr_t size, pgprot_t prot);
> > extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
> >
> > #endif
> >diff --git a/arch/arm64/include/asm/pgtable-hwdef.h 
> >b/arch/arm64/include/asm/pgtable-hwdef.h
> >index eb0c2bd90de9..e66efec31ca9 100644
> >--- a/arch/arm64/include/asm/pgtable-hwdef.h
> >+++ b/arch/arm64/include/asm/pgtable-hwdef.h
> >@@ -119,6 +119,7 @@
> > #define PUD_TABLE_BIT   (_AT(pgdval_t, 1) << 1)
> > #define PUD_TYPE_MASK   (_AT(pgdval_t, 3) << 0)
> > #define PUD_TYPE_SECT   (_AT(pgdval_t, 1) << 0)
> >+#define PUD_VALID   PUD_TYPE_SECT
> >
> > /*
> >  * Level 2 descriptor (PMD).
> >@@ -128,6 +129,7 @@
> > #define PMD_TYPE_TABLE  (_AT(pmdval_t, 3) << 0)
> > #define PMD_TYPE_SECT   (_AT(pmdval_t, 1) << 0)
> > #define PMD_TABLE_BIT   (_AT(pmdval_t, 1) << 1)
> >+#define PMD_VALID   PMD_TYPE_SECT
> >
> > /*
> >  * Section
> >diff --git a/arch/arm64/include/asm/pgtable-prot.h 
> >b/arch/arm64/include/asm/pgtable-prot.h
> >index 2142c7726e76..945d84cd5df7 100644
> >--- a/arch/arm64/include/asm/pgtable-prot.h
> >+++ b/arch/arm64/include/asm/pgtable-prot.h
> >@@ -54,6 +54,7 @@
> > #define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | 
> > PTE_DIRTY | PTE_RDONLY)
> > #define PAGE_KERNEL_EXEC__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | 
> > PTE_WRITE)
> > #define PAGE_KERNEL_EXEC_CONT   __pgprot(_PAGE_DEFAULT | PTE_UXN | 
> > PTE_DIRTY | PTE_WRITE | PTE_CONT)
> >+#define PAGE_KERNEL_INVALID __pgprot(0)
> >
> > #define PAGE_HYP__pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
> > #define PAGE_HYP_EXEC   __pgprot(_PAGE_DEFAULT | PTE_HYP | 
> > PTE_RDONLY)
> >diff --git a/arch/arm64/include/asm/pgtable.h 
> >b/arch/arm64/include/asm/pgtable.h
> >index ffbb9a520563..1904a7c07018 100644
> >--- a/arch/arm64/include/asm/pgtable.h
> >+++ b/arch/arm64/include/asm/pgtable.h
> >@@ -364,6 +364,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, 
> >unsigned long pfn,
> >
> > #define pmd_bad(pmd)(!(pmd_val(pmd) & PMD_TABLE_BIT))
> >
> >+#define pmd_valid(pmd)  (!!(pmd_val(pmd) & PMD_VALID))
> >+
> > #define pmd_table(pmd)  ((pmd_val(pmd) & PMD_TYPE_MASK) == \
> >  PMD_TYPE_TABLE)
> > #define pmd_sect(pmd)   ((pmd_val(pmd) & PMD_TYPE_MASK) == \
> >@@ -428,6 +430,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
> >
> > #define pud_none(pud)   (!pud_val(pud))
> > #define pud_bad(pud)(!(pud_val(pud) & PUD_TABLE_BIT))
> >+#define pud_valid(pud)  (!!(pud_val(pud) & PUD_VALID))
> 
> This will break compilation for CONFIG_PGTABLE_LEVELS <= 2

Ah, yes. A quick fix is as follows:

===8<===
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 1904a7c07018..dc11d4bf332c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -467,6 +467,8 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 
 #else
 
+#define pud_valid(pud) (1)
+
 #define pud_page_paddr(pud)({ BUILD_BUG(); 0; })
 
 /* Match pmd_offset folding in  */
@@ -520,6 +522,8 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 
 #else
 
+#define pgd_valid(pgd) (1)
+
 #define 

Re: [PATCH] /proc/kcore: Update physical address for kcore ram and text

2017-01-24 Thread Dave Young
Hi Pratyush
On 01/25/17 at 10:14am, Pratyush Anand wrote:
> Currently all the p_paddr of PT_LOAD headers are assigned to 0, which is
> not true and could be misleading, since 0 is a valid physical address.

I do not know the history of /proc/kcore, so a question is why the
p_addr was set as 0, if there were some reasons and if this could cause
some risk or breakage.

> 
> User space tools like makedumpfile needs to know physical address for
> PT_LOAD segments of direct mapped regions. Therefore this patch updates
> paddr for such regions. It also sets an invalid paddr (-1) for other
> regions, so that user space tool can know whether a physical address
> provided in PT_LOAD is correct or not.
> 
> Signed-off-by: Pratyush Anand 
> ---
>  fs/proc/kcore.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> index 0b80ad87b4d6..ea9f3d1ae830 100644
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -373,7 +373,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, 
> int dataoff)
>   phdr->p_flags   = PF_R|PF_W|PF_X;
>   phdr->p_offset  = kc_vaddr_to_offset(m->addr) + dataoff;
>   phdr->p_vaddr   = (size_t)m->addr;
> - phdr->p_paddr   = 0;
> + if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
> + phdr->p_paddr   = __pa(m->addr);
> + else
> + phdr->p_paddr   = (elf_addr_t)-1;
>   phdr->p_filesz  = phdr->p_memsz = m->size;
>   phdr->p_align   = PAGE_SIZE;
>   }
> -- 
> 2.9.3
> 

Thanks
Dave

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH] /proc/kcore: Update physical address for kcore ram and text

2017-01-24 Thread Pratyush Anand
Currently all the p_paddr of PT_LOAD headers are assigned to 0, which is
not true and could be misleading, since 0 is a valid physical address.

User space tools like makedumpfile needs to know physical address for
PT_LOAD segments of direct mapped regions. Therefore this patch updates
paddr for such regions. It also sets an invalid paddr (-1) for other
regions, so that user space tool can know whether a physical address
provided in PT_LOAD is correct or not.

Signed-off-by: Pratyush Anand 
---
 fs/proc/kcore.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0b80ad87b4d6..ea9f3d1ae830 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -373,7 +373,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int 
dataoff)
phdr->p_flags   = PF_R|PF_W|PF_X;
phdr->p_offset  = kc_vaddr_to_offset(m->addr) + dataoff;
phdr->p_vaddr   = (size_t)m->addr;
-   phdr->p_paddr   = 0;
+   if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
+   phdr->p_paddr   = __pa(m->addr);
+   else
+   phdr->p_paddr   = (elf_addr_t)-1;
phdr->p_filesz  = phdr->p_memsz = m->size;
phdr->p_align   = PAGE_SIZE;
}
-- 
2.9.3


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v3] kexec: implemented XEN KEXEC STATUS to determine if an image is loaded

2017-01-24 Thread Eric DeVolder

On 01/24/2017 01:16 PM, Daniel Kiper wrote:

On Tue, Jan 24, 2017 at 12:55:35PM -0600, Eric DeVolder wrote:

Instead of the scripts having to poke at various fields we can
provide that functionality via the -S parameter.

kexec_loaded/kexec_crash_loaded exposes Linux kernel kexec/crash
state. It does not say anything about Xen kexec/crash state. So,
we need a special approach to get the latter. Though for
compatibility we provide similar functionality in kexec-tools
for the former.

This change enables the --status or -S option to work either
with or without Xen.

Returns 0 if the payload is loaded. Can be used in combination
with -l or -p to get the state of the proper kexec image.

Signed-off-by: Konrad Rzeszutek Wilk 
Signed-off-by: Eric DeVolder 
---
Note: The corresponding Xen changes have been committed
to the Xen staging branch. Follow this thread:
https://lists.xenproject.org/archives/html/xen-devel/2017-01/msg01570.html

CC: Andrew Cooper 
CC: kexec@lists.infradead.org
CC: xen-de...@lists.xenproject.org
CC: Daniel Kiper 

v0: First version (internal product).
v1: Posted on kexec mailing list. Changed -s to -S
v2: Incorporated feedback from kexec mailing list, posted on kexec mailing list
v3: Incorporated feedback from kexec mailing list
---
 configure.ac  |  8 ++-
 kexec/kexec-xen.c | 26 +++
 kexec/kexec.8 |  6 ++
 kexec/kexec.c | 62 ---
 kexec/kexec.h |  5 -
 5 files changed, 98 insertions(+), 9 deletions(-)

diff --git a/configure.ac b/configure.ac
index 3044185..c6e864b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -165,8 +165,14 @@ fi
 dnl find Xen control stack libraries
 if test "$with_xen" = yes ; then
AC_CHECK_HEADER(xenctrl.h,
-   [AC_CHECK_LIB(xenctrl, xc_kexec_load, ,
+   [AC_CHECK_LIB(xenctrl, xc_kexec_load, [ have_xenctrl_h=yes ],
AC_MSG_NOTICE([Xen support disabled]))])
+if test "$have_xenctrl_h" = yes ; then
+   AC_CHECK_LIB(xenctrl, xc_kexec_status,
+   AC_DEFINE(HAVE_KEXEC_CMD_STATUS, 1,
+   [The kexec_status call is available]),
+   AC_MSG_NOTICE([The kexec_status call is not available]))
+fi


I have a feeling that you have missed my comment. Please add two TABs
starting from "+if test "$have_xenctrl_h" = yes ; then" up to "+fi".
So, it should look more or less like this:

AC_MSG_NOTICE([Xen support disabled]))])
+   if test "$have_xenctrl_h" = yes ; then
+   AC_CHECK_LIB(xenctrl, xc_kexec_status,
...

If it is not needed or something like that please drop me a line.


 fi

 dnl ---Sanity checks
diff --git a/kexec/kexec-xen.c b/kexec/kexec-xen.c
index 24a4191..2b448d3 100644
--- a/kexec/kexec-xen.c
+++ b/kexec/kexec-xen.c
@@ -105,6 +105,27 @@ int xen_kexec_unload(uint64_t kexec_flags)
return ret;
 }

+int xen_kexec_status(uint64_t kexec_flags)
+{
+   xc_interface *xch;
+   uint8_t type;
+   int ret = -1;
+
+#ifdef HAVE_KEXEC_CMD_STATUS
+   xch = xc_interface_open(NULL, NULL, 0);
+   if (!xch)
+   return -1;
+
+   type = (kexec_flags & KEXEC_ON_CRASH) ? KEXEC_TYPE_CRASH : 
KEXEC_TYPE_DEFAULT;
+
+   ret = xc_kexec_status(xch, type);
+
+   xc_interface_close(xch);
+#endif
+
+   return ret;
+}
+
 void xen_kexec_exec(void)
 {
xc_interface *xch;
@@ -130,6 +151,11 @@ int xen_kexec_unload(uint64_t kexec_flags)
return -1;
 }

+int xen_kexec_status(uint64_t kexec_flags)
+{
+   return -1;
+}
+
 void xen_kexec_exec(void)
 {
 }
diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 4d0c1d1..f4b39a6 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -107,6 +107,12 @@ command:
 .B \-d\ (\-\-debug)
 Enable debugging messages.
 .TP
+.B \-S\ (\-\-status)
+Return 0 if the type (by default crash) is loaded. Can be used in conjuction
+with -l or -p to toggle the type. Note this option supersedes other options
+and it will
+.BR not\ load\ or\ unload\ the\ kernel.


Same as above. I think that you have missed my earlier comments.
I suppose that you can join "+and it will" and "+.BR not\ load\ or\
unload\ the\ kernel." into one line.


+.TP
 .B \-e\ (\-\-exec)
 Run the currently loaded kernel. Note that it will reboot into the loaded 
kernel without calling shutdown(8).
 .TP
diff --git a/kexec/kexec.c b/kexec/kexec.c
index 500e5a9..defbbe3 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -51,6 +51,9 @@
 #include "kexec-lzma.h"
 #include 

+#define KEXEC_LOADED_PATH "/sys/kernel/kexec_loaded"
+#define KEXEC_CRASH_LOADED_PATH "/sys/kernel/kexec_crash_loaded"
+
 unsigned long long mem_min = 0;
 unsigned long long mem_max = ULONG_MAX;
 static unsigned long kexec_flags = 0;
@@ -58,6 +61,8 @@ static unsigned long kexec_flags = 0;
 static unsigned long kexec_file_flags = 0;
 int kexec_debug = 0;


Re: [PATCH v3] kexec: implemented XEN KEXEC STATUS to determine if an image is loaded

2017-01-24 Thread Eric DeVolder

On 01/24/2017 01:16 PM, Daniel Kiper wrote:

On Tue, Jan 24, 2017 at 12:55:35PM -0600, Eric DeVolder wrote:

Instead of the scripts having to poke at various fields we can
provide that functionality via the -S parameter.

kexec_loaded/kexec_crash_loaded exposes Linux kernel kexec/crash
state. It does not say anything about Xen kexec/crash state. So,
we need a special approach to get the latter. Though for
compatibility we provide similar functionality in kexec-tools
for the former.

This change enables the --status or -S option to work either
with or without Xen.

Returns 0 if the payload is loaded. Can be used in combination
with -l or -p to get the state of the proper kexec image.

Signed-off-by: Konrad Rzeszutek Wilk 
Signed-off-by: Eric DeVolder 
---
Note: The corresponding Xen changes have been committed
to the Xen staging branch. Follow this thread:
https://lists.xenproject.org/archives/html/xen-devel/2017-01/msg01570.html

CC: Andrew Cooper 
CC: kexec@lists.infradead.org
CC: xen-de...@lists.xenproject.org
CC: Daniel Kiper 

v0: First version (internal product).
v1: Posted on kexec mailing list. Changed -s to -S
v2: Incorporated feedback from kexec mailing list, posted on kexec mailing list
v3: Incorporated feedback from kexec mailing list
---
 configure.ac  |  8 ++-
 kexec/kexec-xen.c | 26 +++
 kexec/kexec.8 |  6 ++
 kexec/kexec.c | 62 ---
 kexec/kexec.h |  5 -
 5 files changed, 98 insertions(+), 9 deletions(-)

diff --git a/configure.ac b/configure.ac
index 3044185..c6e864b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -165,8 +165,14 @@ fi
 dnl find Xen control stack libraries
 if test "$with_xen" = yes ; then
AC_CHECK_HEADER(xenctrl.h,
-   [AC_CHECK_LIB(xenctrl, xc_kexec_load, ,
+   [AC_CHECK_LIB(xenctrl, xc_kexec_load, [ have_xenctrl_h=yes ],
AC_MSG_NOTICE([Xen support disabled]))])
+if test "$have_xenctrl_h" = yes ; then
+   AC_CHECK_LIB(xenctrl, xc_kexec_status,
+   AC_DEFINE(HAVE_KEXEC_CMD_STATUS, 1,
+   [The kexec_status call is available]),
+   AC_MSG_NOTICE([The kexec_status call is not available]))
+fi


I have a feeling that you have missed my comment. Please add two TABs
starting from "+if test "$have_xenctrl_h" = yes ; then" up to "+fi".
So, it should look more or less like this:

AC_MSG_NOTICE([Xen support disabled]))])
+   if test "$have_xenctrl_h" = yes ; then
+   AC_CHECK_LIB(xenctrl, xc_kexec_status,
...

If it is not needed or something like that please drop me a line.


The tabs are not needed for the configure to work properly.

If tabs are needed for readability/style purposes, I will
add them in. There is not any precedent of nesting in
the configure.ac file, so I am unsure what convention is
for this package.




 fi

 dnl ---Sanity checks
diff --git a/kexec/kexec-xen.c b/kexec/kexec-xen.c
index 24a4191..2b448d3 100644
--- a/kexec/kexec-xen.c
+++ b/kexec/kexec-xen.c
@@ -105,6 +105,27 @@ int xen_kexec_unload(uint64_t kexec_flags)
return ret;
 }

+int xen_kexec_status(uint64_t kexec_flags)
+{
+   xc_interface *xch;
+   uint8_t type;
+   int ret = -1;
+
+#ifdef HAVE_KEXEC_CMD_STATUS
+   xch = xc_interface_open(NULL, NULL, 0);
+   if (!xch)
+   return -1;
+
+   type = (kexec_flags & KEXEC_ON_CRASH) ? KEXEC_TYPE_CRASH : 
KEXEC_TYPE_DEFAULT;
+
+   ret = xc_kexec_status(xch, type);
+
+   xc_interface_close(xch);
+#endif
+
+   return ret;
+}
+
 void xen_kexec_exec(void)
 {
xc_interface *xch;
@@ -130,6 +151,11 @@ int xen_kexec_unload(uint64_t kexec_flags)
return -1;
 }

+int xen_kexec_status(uint64_t kexec_flags)
+{
+   return -1;
+}
+
 void xen_kexec_exec(void)
 {
 }
diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 4d0c1d1..f4b39a6 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -107,6 +107,12 @@ command:
 .B \-d\ (\-\-debug)
 Enable debugging messages.
 .TP
+.B \-S\ (\-\-status)
+Return 0 if the type (by default crash) is loaded. Can be used in conjuction
+with -l or -p to toggle the type. Note this option supersedes other options
+and it will
+.BR not\ load\ or\ unload\ the\ kernel.


Same as above. I think that you have missed my earlier comments.
I suppose that you can join "+and it will" and "+.BR not\ load\ or\
unload\ the\ kernel." into one line.


In that file, all dot directives start with the dot in the
first column. I did the same for the .BR in this statement.




+.TP
 .B \-e\ (\-\-exec)
 Run the currently loaded kernel. Note that it will reboot into the loaded 
kernel without calling shutdown(8).
 .TP
diff --git a/kexec/kexec.c b/kexec/kexec.c
index 500e5a9..defbbe3 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -51,6 +51,9 @@
 #include 

[PATCH v3] kexec: implemented XEN KEXEC STATUS to determine if an image is loaded

2017-01-24 Thread Eric DeVolder
Instead of the scripts having to poke at various fields we can
provide that functionality via the -S parameter.

kexec_loaded/kexec_crash_loaded exposes Linux kernel kexec/crash
state. It does not say anything about Xen kexec/crash state. So,
we need a special approach to get the latter. Though for
compatibility we provide similar functionality in kexec-tools
for the former.

This change enables the --status or -S option to work either
with or without Xen.

Returns 0 if the payload is loaded. Can be used in combination
with -l or -p to get the state of the proper kexec image.

Signed-off-by: Konrad Rzeszutek Wilk 
Signed-off-by: Eric DeVolder 
---
Note: The corresponding Xen changes have been committed
to the Xen staging branch. Follow this thread:
https://lists.xenproject.org/archives/html/xen-devel/2017-01/msg01570.html

CC: Andrew Cooper 
CC: kexec@lists.infradead.org
CC: xen-de...@lists.xenproject.org
CC: Daniel Kiper 

v0: First version (internal product).
v1: Posted on kexec mailing list. Changed -s to -S
v2: Incorporated feedback from kexec mailing list, posted on kexec mailing list
v3: Incorporated feedback from kexec mailing list
---
 configure.ac  |  8 ++-
 kexec/kexec-xen.c | 26 +++
 kexec/kexec.8 |  6 ++
 kexec/kexec.c | 62 ---
 kexec/kexec.h |  5 -
 5 files changed, 98 insertions(+), 9 deletions(-)

diff --git a/configure.ac b/configure.ac
index 3044185..c6e864b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -165,8 +165,14 @@ fi
 dnl find Xen control stack libraries
 if test "$with_xen" = yes ; then
AC_CHECK_HEADER(xenctrl.h,
-   [AC_CHECK_LIB(xenctrl, xc_kexec_load, ,
+   [AC_CHECK_LIB(xenctrl, xc_kexec_load, [ have_xenctrl_h=yes ],
AC_MSG_NOTICE([Xen support disabled]))])
+if test "$have_xenctrl_h" = yes ; then
+   AC_CHECK_LIB(xenctrl, xc_kexec_status,
+   AC_DEFINE(HAVE_KEXEC_CMD_STATUS, 1,
+   [The kexec_status call is available]),
+   AC_MSG_NOTICE([The kexec_status call is not available]))
+fi
 fi
 
 dnl ---Sanity checks
diff --git a/kexec/kexec-xen.c b/kexec/kexec-xen.c
index 24a4191..2b448d3 100644
--- a/kexec/kexec-xen.c
+++ b/kexec/kexec-xen.c
@@ -105,6 +105,27 @@ int xen_kexec_unload(uint64_t kexec_flags)
return ret;
 }
 
+int xen_kexec_status(uint64_t kexec_flags)
+{
+   xc_interface *xch;
+   uint8_t type;
+   int ret = -1;
+
+#ifdef HAVE_KEXEC_CMD_STATUS
+   xch = xc_interface_open(NULL, NULL, 0);
+   if (!xch)
+   return -1;
+
+   type = (kexec_flags & KEXEC_ON_CRASH) ? KEXEC_TYPE_CRASH : 
KEXEC_TYPE_DEFAULT;
+
+   ret = xc_kexec_status(xch, type);
+
+   xc_interface_close(xch);
+#endif
+
+   return ret;
+}
+
 void xen_kexec_exec(void)
 {
xc_interface *xch;
@@ -130,6 +151,11 @@ int xen_kexec_unload(uint64_t kexec_flags)
return -1;
 }
 
+int xen_kexec_status(uint64_t kexec_flags)
+{
+   return -1;
+}
+
 void xen_kexec_exec(void)
 {
 }
diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 4d0c1d1..f4b39a6 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -107,6 +107,12 @@ command:
 .B \-d\ (\-\-debug)
 Enable debugging messages.
 .TP
+.B \-S\ (\-\-status)
+Return 0 if the type (by default crash) is loaded. Can be used in conjuction
+with -l or -p to toggle the type. Note this option supersedes other options
+and it will
+.BR not\ load\ or\ unload\ the\ kernel.
+.TP
 .B \-e\ (\-\-exec)
 Run the currently loaded kernel. Note that it will reboot into the loaded 
kernel without calling shutdown(8).
 .TP
diff --git a/kexec/kexec.c b/kexec/kexec.c
index 500e5a9..defbbe3 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -51,6 +51,9 @@
 #include "kexec-lzma.h"
 #include 
 
+#define KEXEC_LOADED_PATH "/sys/kernel/kexec_loaded"
+#define KEXEC_CRASH_LOADED_PATH "/sys/kernel/kexec_crash_loaded"
+
 unsigned long long mem_min = 0;
 unsigned long long mem_max = ULONG_MAX;
 static unsigned long kexec_flags = 0;
@@ -58,6 +61,8 @@ static unsigned long kexec_flags = 0;
 static unsigned long kexec_file_flags = 0;
 int kexec_debug = 0;
 
+static int kexec_loaded(const char *file);
+
 void dbgprint_mem_range(const char *prefix, struct memory_range *mr, int nr_mr)
 {
int i;
@@ -890,8 +895,6 @@ static int my_exec(void)
return -1;
 }
 
-static int kexec_loaded(void);
-
 static int load_jump_back_helper_image(unsigned long kexec_flags, void *entry)
 {
int result;
@@ -909,7 +912,7 @@ static int my_load_jump_back_helper(unsigned long 
kexec_flags, void *entry)
 {
int result;
 
-   if (kexec_loaded()) {
+   if (kexec_loaded(KEXEC_LOADED_PATH)) {
fprintf(stderr, "There is kexec kernel loaded, make sure "
"you are in kexeced kernel.\n");
return -1;
@@ 

Re: [PATCH v4 2/5] ia64: reuse append_elf_note() and final_note() functions

2017-01-24 Thread Tony Luck
On Tue, Jan 24, 2017 at 10:11 AM, Hari Bathini
 wrote:

> Hello IA64 folks,
>
> Could you please review this patch..?

It looks OK in principal.  My lab is in partial disarray at the
moment (just got back from a sabbatical) so I can't test
build and boot. Have you cross-compiled it (or gotten a success
build report from zero-day)?

If you have ... then add an Acked-by: Tony Luck 

-Tony

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v4 2/5] ia64: reuse append_elf_note() and final_note() functions

2017-01-24 Thread Hari Bathini



On Tuesday 17 January 2017 10:36 PM, Hari Bathini wrote:



On Friday 06 January 2017 07:33 AM, Dave Young wrote:

On 01/05/17 at 11:01pm, Hari Bathini wrote:

Get rid of multiple definitions of append_elf_note() & final_note()
functions. Reuse these functions compiled under CONFIG_CRASH_CORE
Also, define Elf_Word and use it instead of generic u32 or the more
specific Elf64_Word.

Signed-off-by: Hari Bathini 
---

Changes from v3:
* Dropped hard-coded values and used DIV_ROUND_UP().

Changes from v2:
* Added a definition for Elf_Word.
* Used IA64 version of append_elf_note() and final_note() functions.


  arch/ia64/kernel/crash.c   |   22 --
  include/linux/crash_core.h |4 
  include/linux/elf.h|2 ++
  kernel/crash_core.c|   34 ++
  kernel/kexec_core.c|   28 
  5 files changed, 20 insertions(+), 70 deletions(-)

diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index 2955f35..75859a0 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -27,28 +27,6 @@ static int kdump_freeze_monarch;
  static int kdump_on_init = 1;
  static int kdump_on_fatal_mca = 1;
  -static inline Elf64_Word
-*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void 
*data,

-size_t data_len)
-{
-struct elf_note *note = (struct elf_note *)buf;
-note->n_namesz = strlen(name) + 1;
-note->n_descsz = data_len;
-note->n_type   = type;
-buf += (sizeof(*note) + 3)/4;
-memcpy(buf, name, note->n_namesz);
-buf += (note->n_namesz + 3)/4;
-memcpy(buf, data, data_len);
-buf += (data_len + 3)/4;
-return buf;
-}
-
-static void
-final_note(void *buf)
-{
-memset(buf, 0, sizeof(struct elf_note));
-}
-
  extern void ia64_dump_cpu_regs(void *);
static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 18d0f94..541a197 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -55,6 +55,10 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
  extern size_t vmcoreinfo_size;
  extern size_t vmcoreinfo_max_size;
  +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int 
type,

+  void *data, size_t data_len);
+void final_note(Elf_Word *buf);
+
  int __init parse_crashkernel(char *cmdline, unsigned long long 
system_ram,
  unsigned long long *crash_size, unsigned long long 
*crash_base);
  int parse_crashkernel_high(char *cmdline, unsigned long long 
system_ram,

diff --git a/include/linux/elf.h b/include/linux/elf.h
index 20fa8d8..ba069e8 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -29,6 +29,7 @@ extern Elf32_Dyn _DYNAMIC [];
  #define elf_noteelf32_note
  #define elf_addr_tElf32_Off
  #define Elf_HalfElf32_Half
+#define Elf_WordElf32_Word
#else
  @@ -39,6 +40,7 @@ extern Elf64_Dyn _DYNAMIC [];
  #define elf_noteelf64_note
  #define elf_addr_tElf64_Off
  #define Elf_HalfElf64_Half
+#define Elf_WordElf64_Word
#endif
  diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 80b441d..362dace 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -291,32 +291,26 @@ int __init parse_crashkernel_low(char *cmdline,
  "crashkernel=", suffix_tbl[SUFFIX_LOW]);
  }
  -static u32 *append_elf_note(u32 *buf, char *name, unsigned int type,
-void *data, size_t data_len)
+Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int 
type,

+  void *data, size_t data_len)
  {
-struct elf_note note;
-
-note.n_namesz = strlen(name) + 1;
-note.n_descsz = data_len;
-note.n_type   = type;
-memcpy(buf, , sizeof(note));
-buf += (sizeof(note) + 3)/4;
-memcpy(buf, name, note.n_namesz);
-buf += (note.n_namesz + 3)/4;
-memcpy(buf, data, note.n_descsz);
-buf += (note.n_descsz + 3)/4;
+struct elf_note *note = (struct elf_note *)buf;
+
+note->n_namesz = strlen(name) + 1;
+note->n_descsz = data_len;
+note->n_type   = type;
+buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word));
+memcpy(buf, name, note->n_namesz);
+buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word));
+memcpy(buf, data, data_len);
+buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word));
return buf;
  }
  -static void final_note(u32 *buf)
+void final_note(Elf_Word *buf)
  {
-struct elf_note note;
-
-note.n_namesz = 0;
-note.n_descsz = 0;
-note.n_type   = 0;
-memcpy(buf, , sizeof(note));
+memset(buf, 0, sizeof(struct elf_note));
  }
static void update_vmcoreinfo_note(void)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 2179a16..263d764 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -990,34 +990,6 @@ int crash_shrink_memory(unsigned long new_size)
  return ret;
  }
  -static u32 *append_elf_note(u32 *buf, char 

Re: [PATCH v4 2/5] ia64: reuse append_elf_note() and final_note() functions

2017-01-24 Thread Hari Bathini



On Friday 20 January 2017 11:17 AM, Michael Ellerman wrote:

Hari Bathini  writes:


Get rid of multiple definitions of append_elf_note() & final_note()
functions. Reuse these functions compiled under CONFIG_CRASH_CORE
Also, define Elf_Word and use it instead of generic u32 or the more
specific Elf64_Word.

Signed-off-by: Hari Bathini 
---

Changes from v3:
* Dropped hard-coded values and used DIV_ROUND_UP().

Changes from v2:
* Added a definition for Elf_Word.
* Used IA64 version of append_elf_note() and final_note() functions.


  arch/ia64/kernel/crash.c   |   22 --
  include/linux/crash_core.h |4 
  include/linux/elf.h|2 ++
  kernel/crash_core.c|   34 ++
  kernel/kexec_core.c|   28 
  5 files changed, 20 insertions(+), 70 deletions(-)

Do the powerpc patches later in the series actually depend on this one?
Or is this just an unrelated cleanup?

As it is I can't merge the series until we at least get an ack on this
from the ia64 folks.

If you can just split this out as a separate patch that would make it a
lot easier to get the rest merged.



Hi Michael,

append_elf_note() & final_note() functions were defined statically at 
three different places,
arch/powerpc/kernel/fadump.c being one of them. With my changes, I would 
need to add
a fourth static definition if I ignore this cleanup. So, I preferred to 
clean this up...


Let me ping IA64 folks one last time. Will do a respin without the 
cleanup if I don't get

any response from them by end of this week..

Thanks
Hari


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2] kexec: implemented XEN KEXEC STATUS to determine if an image is loaded

2017-01-24 Thread Konrad Rzeszutek Wilk
On Tue, Jan 24, 2017 at 02:35:17PM +0100, Simon Horman wrote:
> On Fri, Jan 20, 2017 at 11:03:54AM -0600, Eric DeVolder wrote:
> > Instead of the scripts having to poke at various fields we can
> > provide that functionality via the -S parameter.
> > 
> > Returns 0 if the payload is loaded. Can be used in combination
> > with -l or -p to get the state of the proper kexec image.
> > 
> > Signed-off-by: Konrad Rzeszutek Wilk 
> > Signed-off-by: Eric DeVolder 
> > ---
> > Note: The corresponding Xen changes have been committed
> > to the Xen staging branch. Follow this thread:
> > https://lists.xenproject.org/archives/html/xen-devel/2017-01/msg01570.html
> 
> Hi,
> 
> this change seems worthwhile to me but am I correct in thinking that
> it adds a new -S option that works both with and without Xen?

Correct.
> If so I'd like that reflected in the changelog.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v3] kexec: Increase the upper limit for RAM segments

2017-01-24 Thread Simon Horman
On Wed, Jan 18, 2017 at 04:15:12PM -0700, Sameer Goel wrote:
> On a newer UEFI based Qualcomm target the number of system ram regions
> retrieved from /proc/iomem are ~40. So increasing the current hardcoded
> values to 64 from 16.
> 
> Signed-off-by: Sameer Goel 

Thanks, applied.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] alpha: add missing __NR_kexec_load definition

2017-01-24 Thread Simon Horman
On Tue, Dec 20, 2016 at 09:29:18AM +0100, Simon Horman wrote:
> Signed-off-by: Simon Horman 
> ---
>  kexec/kexec-syscall.h | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/kexec/kexec-syscall.h b/kexec/kexec-syscall.h
> index c0d0beadf932..3b5c528d8aac 100644
> --- a/kexec/kexec-syscall.h
> +++ b/kexec/kexec-syscall.h
> @@ -48,6 +48,9 @@
>  #ifdef __m68k__
>  #define __NR_kexec_load313
>  #endif
> +#ifdef __alpha__
> +#define __NR_kexec_load448
> +#endif
>  #ifndef __NR_kexec_load
>  #error Unknown processor architecture.  Needs a kexec_load syscall number.
>  #endif

I have applied this but for the record alpha still seems far away from
compiling in my environment.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2] kexec: implemented XEN KEXEC STATUS to determine if an image is loaded

2017-01-24 Thread Simon Horman
On Fri, Jan 20, 2017 at 11:03:54AM -0600, Eric DeVolder wrote:
> Instead of the scripts having to poke at various fields we can
> provide that functionality via the -S parameter.
> 
> Returns 0 if the payload is loaded. Can be used in combination
> with -l or -p to get the state of the proper kexec image.
> 
> Signed-off-by: Konrad Rzeszutek Wilk 
> Signed-off-by: Eric DeVolder 
> ---
> Note: The corresponding Xen changes have been committed
> to the Xen staging branch. Follow this thread:
> https://lists.xenproject.org/archives/html/xen-devel/2017-01/msg01570.html

Hi,

this change seems worthwhile to me but am I correct in thinking that
it adds a new -S option that works both with and without Xen?
If so I'd like that reflected in the changelog.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] ppc64: Reduce number of ELF LOAD segments

2017-01-24 Thread Simon Horman
On Thu, Jan 19, 2017 at 06:37:09PM +0100, Petr Tesarik wrote:
> The number of program header table entries (e_phnum) is an Elf64_Half,
> which is a 16-bit entity, i.e. the limit is 65534 entries (one entry is
> reserved for NOTE). This is a hard limit, defined by the ELF standard.
> It is possible that more LMBs (Logical Memory Blocks) are needed to
> represent all RAM on some machines, and this field overflows, causing
> an incomplete /proc/vmcore file.
> 
> This has actually happened on a machine with 31TB of RAM and an LMB size
> of 256MB.
> 
> However, since there is usually no memory hole between adjacent LMBs, the
> map can be "compressed", combining multiple adjacent into a single LOAD
> segment.
> 
> Signed-off-by: Petr Tesarik 

Thanks, applied.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86/mce: Keep quiet in case of broadcasted mce after system panic

2017-01-24 Thread Borislav Petkov
On Tue, Jan 24, 2017 at 09:27:45AM +0800, Xunlei Pang wrote:
> It occurred on real hardware when testing crash dump.
> 
> 1) SysRq-c was injected for the test in 1st kernel
> [ 49.897279] SysRq : Trigger a crash 2) The 2nd kernel started for kdump
>[ 0.00] Command line: BOOT_IMAGE=/vmlinuz-3.10.0-229.el7.x86_64 
> root=UUID=976a15c8-8cbe-44ad-bb91-23f9b18e8789

Yeah, no, I'm not debugging the RH Frankenstein kernel.

Please retrigger this with latest tip/master first.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86/mce: Keep quiet in case of broadcasted mce after system panic

2017-01-24 Thread Xunlei Pang
On 01/23/2017 at 10:50 PM, Borislav Petkov wrote:
> On Mon, Jan 23, 2017 at 09:35:53PM +0800, Xunlei Pang wrote:
>> One possible timing sequence would be:
>> 1st kernel running on multiple cpus panicked
>> then the crash dump code starts
>> the crash dump code stops the others cpus except the crashing one
>> 2nd kernel boots up on the crash cpu with "nr_cpus=1"
>> some broadcasted mce comes on some cpu amongst the other cpus(not the 
>> crashing cpu)
> Where does this broadcasted MCE come from?
>
> The crash dump code triggered it? Or it happened before the panic()?
>
> Are you talking about an *actual* sequence which you're experiencing on
> real hw or is this something hypothetical?
>

It occurred on real hardware when testing crash dump.

1) SysRq-c was injected for the test in 1st kernel
[ 49.897279] SysRq : Trigger a crash 2) The 2nd kernel started for kdump
   [ 0.00] Command line: BOOT_IMAGE=/vmlinuz-3.10.0-229.el7.x86_64 
root=UUID=976a15c8-8cbe-44ad-bb91-23f9b18e8789 ro console=ttyS1,115200 
nmi_watchdog=0 irqpoll nr_cpus=1 reset_devices cgroup_disable=memory mce=off 
numa=off udev.children-max=2 panic=10 rootflags=nofail acpi_no_memhotplug 
disable_cpu_apicid=0 elfcorehdr=869772K 3) An MCE came to the 1st kernel, 
timeout panic occurred, and rebooted the machine
[6.095706] Dazed and confused, but trying to continue  // message of 
the 1st kernel
[   81.655507] Kernel panic - not syncing: Timeout synchronizing machine 
check over CPUs
[   82.729324] Shutting down cpus with NMI
[   82.774539] drm_kms_helper: panic occurred, switching back to text 
console
[   82.782257] Rebooting in 10 seconds..

Please see the attached for the full log. Regards, Xunlei

[   49.897279] SysRq : Trigger a crash 
[   49.901218] BUG: unable to handle kernel NULL pointer dereference at 
  (null) 
[   49.909988] IP: [] sysrq_handle_crash+0x16/0x20 
[   49.916805] PGD 868add067 PUD 867139067 PMD 0  
[   49.921805] Oops: 0002 [#1] SMP  
[   49.925432] Modules linked in: ipmi_devintf intel_powerclamp coretemp 
intel_rapl kvm_intel kvm crct10dif_pclmul crc32_pclmul crc32c_intel 
ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd 
iTCO_wdt sb_edac iTCO_vendor_support ntb mei_me pcspkr edac_core ioatdma 
lpc_ich i2c_i801 ipmi_si mei mfd_core shpchp dca ipmi_msghandler acpi_pad 
acpi_power_meter xfs sd_mod sr_mod crc_t10dif cdrom crct10dif_common 
usb_storage mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit 
drm_kms_helper ata_generic ttm bnx2x pata_acpi mdio drm ata_piix ptp libata 
i2c_core pps_core libcrc32c 
[   49.984994] CPU: 9 PID: 9463 Comm: do-test.sh Not tainted 
3.10.0-229.el7.x86_64 #1 
[   49.993456] Hardware name: NEC Express5800/B120d-h [N8400-126Y]/G7LDV, BIOS 
4.6.2013 10/24/2012 
[   50.003164] task: 88043370 ti: 8808653b8000 task.ti: 
8808653b8000 
[   50.011514] RIP: 0010:[]  [] 
sysrq_handle_crash+0x16/0x20 
[   50.021045] RSP: 0018:8808653bbe80  EFLAGS: 00010046 
[   50.026976] RAX: 000f RBX: 819c18a0 RCX: 
 
[   50.034939] RDX:  RSI: 88087fc2d488 RDI: 
0063 
[   50.042908] RBP: 8808653bbe80 R08: 0092 R09: 
0608 
[   50.050870] R10: 0607 R11: 0003 R12: 
0063 
[   50.058837] R13: 0246 R14: 0007 R15: 
 
[   50.066799] FS:  7f0faaf54740() GS:88087fc2() 
knlGS: 
[   50.075828] CS:  0010 DS:  ES:  CR0: 80050033 
[   50.082244] CR2:  CR3: 000866d07000 CR4: 
000407e0 
[   50.090212] DR0:  DR1:  DR2: 
 
[   50.098173] DR3:  DR6: 0ff0 DR7: 
0400 
[   50.106133] Stack: 
[   50.108388]  8808653bbeb8 81397c32 0002 
7f0faaf58000 
[   50.116671]  8808653bbf48 0002  
8808653bbed0 
[   50.124963]  8139810f 8804674a6540 8808653bbef0 
8122de0d 
[   50.133257] Call Trace: 
[   50.135993]  [] __handle_sysrq+0xa2/0x170 
[   50.142219]  [] write_sysrq_trigger+0x2f/0x40 
[   50.148841]  [] pro c_reg_write+0x3] Code: eb 9b 45 01 f4 
45 39 65 34 75 e5 4c 89 ef e8 e2 f7 ff ff eb db 66 66 66 66 90 55 c7 05 50 d7 
59 00 01 00 00 00 48 89 e5 0f ae f8  04 25 00 00 00 00 01 5d c3 66 66 66 66 
90 55 31 c0 c7 05 ce  
[   50.194758] RIP  [] sysrq_handle_crash+0x16/0x20 
[   50.201669]  RSP  
[   50.205558] CR2:  
[0.00] Initializing cgroup subsys cpuset 
[0.00] Initializing cgroup subsys cpu 
[0.00] Initializing cgroup subsys cpuacct 
[0.00] Linux version 3.10.0-229.el7.x86_64 
(mockbu...@x86-035.build.eng.bos.redhat.com) (gcc version 4.8.3 20140911 (Red 
Hat 4.8.3-7) (GCC) ) #1 SMP Thu Jan 29 18:37:38 EST 2015 
[0.00] Command line: BOOT_IMAGE=/vmlinuz-3.10.0-229.el7.x86_64 

Re: [PATCH v30 04/11] arm64: mm: allow for unmapping memory region from kernel mapping

2017-01-24 Thread Pratyush Anand



On Tuesday 24 January 2017 02:19 PM, AKASHI Takahiro wrote:

The current implementation of create_mapping_late() is only allowed
to modify permission attributes (read-only or read-write) against
the existing kernel mapping.

In this patch, PAGE_KERNEL_INVALID protection attribute is introduced.
We will now be able to invalidate (or unmap) some part of the existing
kernel mapping by specifying PAGE_KERNEL_INVALID to create_mapping_late().

This feature will be used in a suceeding kdump patch to protect
the memory reserved for crash dump kernel once after loaded.

Signed-off-by: AKASHI Takahiro 
---
 arch/arm64/include/asm/mmu.h   |  2 ++
 arch/arm64/include/asm/pgtable-hwdef.h |  2 ++
 arch/arm64/include/asm/pgtable-prot.h  |  1 +
 arch/arm64/include/asm/pgtable.h   |  4 
 arch/arm64/mm/mmu.c| 29 -
 5 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 47619411f0ff..a6c1367527bc 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -36,6 +36,8 @@ extern void init_mem_pgprot(void);
 extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
   unsigned long virt, phys_addr_t size,
   pgprot_t prot, bool page_mappings_only);
+extern void create_mapping_late(phys_addr_t phys, unsigned long virt,
+   phys_addr_t size, pgprot_t prot);
 extern void *fixmap_remap_fdt(phys_addr_t dt_phys);

 #endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h 
b/arch/arm64/include/asm/pgtable-hwdef.h
index eb0c2bd90de9..e66efec31ca9 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -119,6 +119,7 @@
 #define PUD_TABLE_BIT  (_AT(pgdval_t, 1) << 1)
 #define PUD_TYPE_MASK  (_AT(pgdval_t, 3) << 0)
 #define PUD_TYPE_SECT  (_AT(pgdval_t, 1) << 0)
+#define PUD_VALID  PUD_TYPE_SECT

 /*
  * Level 2 descriptor (PMD).
@@ -128,6 +129,7 @@
 #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_SECT  (_AT(pmdval_t, 1) << 0)
 #define PMD_TABLE_BIT  (_AT(pmdval_t, 1) << 1)
+#define PMD_VALID  PMD_TYPE_SECT

 /*
  * Section
diff --git a/arch/arm64/include/asm/pgtable-prot.h 
b/arch/arm64/include/asm/pgtable-prot.h
index 2142c7726e76..945d84cd5df7 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -54,6 +54,7 @@
 #define PAGE_KERNEL_ROX__pgprot(_PAGE_DEFAULT | PTE_UXN | 
PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC   __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | 
PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | 
PTE_WRITE | PTE_CONT)
+#define PAGE_KERNEL_INVALID__pgprot(0)

 #define PAGE_HYP   __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
 #define PAGE_HYP_EXEC  __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index ffbb9a520563..1904a7c07018 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -364,6 +364,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, 
unsigned long pfn,

 #define pmd_bad(pmd)   (!(pmd_val(pmd) & PMD_TABLE_BIT))

+#define pmd_valid(pmd) (!!(pmd_val(pmd) & PMD_VALID))
+
 #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
 PMD_TYPE_TABLE)
 #define pmd_sect(pmd)  ((pmd_val(pmd) & PMD_TYPE_MASK) == \
@@ -428,6 +430,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)

 #define pud_none(pud)  (!pud_val(pud))
 #define pud_bad(pud)   (!(pud_val(pud) & PUD_TABLE_BIT))
+#define pud_valid(pud) (!!(pud_val(pud) & PUD_VALID))


This will break compilation for CONFIG_PGTABLE_LEVELS <= 2


 #define pud_present(pud)   (pud_val(pud))

 static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -481,6 +484,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)

 #define pgd_none(pgd)  (!pgd_val(pgd))
 #define pgd_bad(pgd)   (!(pgd_val(pgd) & 2))
+#define pgd_valid(pgd) (!!(pgd_val(pgd) & 1))


This has not been used anywhere.


 #define pgd_present(pgd)   (pgd_val(pgd))

 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 17243e43184e..9c7adcce8e4e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -133,7 +133,8 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 * Set the contiguous bit for the subsequent group of PTEs if
 * its size and alignment are appropriate.
 */
-   if (((addr | PFN_PHYS(pfn)) & ~CONT_PTE_MASK) == 0) {
+   if ((pgprot_val(prot) & PTE_VALID) &&
+  

[PATCH v30 05/11] arm64: kdump: protect crash dump kernel memory

2017-01-24 Thread AKASHI Takahiro
To protect the memory reserved for crash dump kernel once after loaded,
arch_kexec_protect_crashres/unprotect_crashres() are meant to deal with
permissions of the corresponding kernel mappings.

We also have to
- put the region in an isolated mapping, and
- move copying kexec's control_code_page to machine_kexec_prepare()
so that the region will be completely read-only after loading.

Note that the region must reside in linear mapping and have corresponding
page structures in order to be potentially freed by shrinking it through
/sys/kernel/kexec_crash_size.

Signed-off-by: AKASHI Takahiro 
---
 arch/arm64/kernel/machine_kexec.c | 68 +--
 arch/arm64/mm/mmu.c   | 34 
 2 files changed, 77 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/kernel/machine_kexec.c 
b/arch/arm64/kernel/machine_kexec.c
index bc96c8a7fc79..f7938fecf3ff 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -14,6 +14,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 #include "cpu-reset.h"
@@ -22,8 +23,6 @@
 extern const unsigned char arm64_relocate_new_kernel[];
 extern const unsigned long arm64_relocate_new_kernel_size;
 
-static unsigned long kimage_start;
-
 /**
  * kexec_image_info - For debugging output.
  */
@@ -64,7 +63,7 @@ void machine_kexec_cleanup(struct kimage *kimage)
  */
 int machine_kexec_prepare(struct kimage *kimage)
 {
-   kimage_start = kimage->start;
+   void *reboot_code_buffer;
 
kexec_image_info(kimage);
 
@@ -73,6 +72,21 @@ int machine_kexec_prepare(struct kimage *kimage)
return -EBUSY;
}
 
+   reboot_code_buffer =
+   phys_to_virt(page_to_phys(kimage->control_code_page));
+
+   /*
+* Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
+* after the kernel is shut down.
+*/
+   memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
+   arm64_relocate_new_kernel_size);
+
+   /* Flush the reboot_code_buffer in preparation for its execution. */
+   __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
+   flush_icache_range((uintptr_t)reboot_code_buffer,
+   arm64_relocate_new_kernel_size);
+
return 0;
 }
 
@@ -143,7 +157,6 @@ static void kexec_segment_flush(const struct kimage *kimage)
 void machine_kexec(struct kimage *kimage)
 {
phys_addr_t reboot_code_buffer_phys;
-   void *reboot_code_buffer;
 
/*
 * New cpus may have become stuck_in_kernel after we loaded the image.
@@ -151,7 +164,6 @@ void machine_kexec(struct kimage *kimage)
BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1));
 
reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
-   reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
 
kexec_image_info(kimage);
 
@@ -159,32 +171,20 @@ void machine_kexec(struct kimage *kimage)
kimage->control_code_page);
pr_debug("%s:%d: reboot_code_buffer_phys:  %pa\n", __func__, __LINE__,
_code_buffer_phys);
-   pr_debug("%s:%d: reboot_code_buffer:   %p\n", __func__, __LINE__,
-   reboot_code_buffer);
pr_debug("%s:%d: relocate_new_kernel:  %p\n", __func__, __LINE__,
arm64_relocate_new_kernel);
pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
__func__, __LINE__, arm64_relocate_new_kernel_size,
arm64_relocate_new_kernel_size);
 
-   /*
-* Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
-* after the kernel is shut down.
-*/
-   memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
-   arm64_relocate_new_kernel_size);
-
-   /* Flush the reboot_code_buffer in preparation for its execution. */
-   __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
-   flush_icache_range((uintptr_t)reboot_code_buffer,
-   arm64_relocate_new_kernel_size);
-
-   /* Flush the kimage list and its buffers. */
-   kexec_list_flush(kimage);
+   if (kimage != kexec_crash_image) {
+   /* Flush the kimage list and its buffers. */
+   kexec_list_flush(kimage);
 
-   /* Flush the new image if already in place. */
-   if (kimage->head & IND_DONE)
-   kexec_segment_flush(kimage);
+   /* Flush the new image if already in place. */
+   if (kimage->head & IND_DONE)
+   kexec_segment_flush(kimage);
+   }
 
pr_info("Bye!\n");
 
@@ -201,7 +201,7 @@ void machine_kexec(struct kimage *kimage)
 */
 
cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
-   kimage_start, 0);
+   kimage->start, 0);
 
BUG(); /* Should never get here. */
 }
@@ -210,3 +210,21 @@ void 

[PATCH v30 11/11] Documentation: dt: chosen properties for arm64 kdump

2017-01-24 Thread AKASHI Takahiro
From: James Morse 

Add documentation for DT properties:
linux,usable-memory-range
linux,elfcorehdr
used by arm64 kdump. Those decribe the usable memory range for crash dump
kernel and the elfcorehdr's location within it, respectively.

Signed-off-by: James Morse 
[takahiro.aka...@linaro.org: added "linux,crashkernel-base" and "-size" ]
Signed-off-by: AKASHI Takahiro 
Acked-by: Mark Rutland 
Cc: devicet...@vger.kernel.org
Cc: Rob Herring 
---
 Documentation/devicetree/bindings/chosen.txt | 37 
 1 file changed, 37 insertions(+)

diff --git a/Documentation/devicetree/bindings/chosen.txt 
b/Documentation/devicetree/bindings/chosen.txt
index 6ae9d82d4c37..8dc82431acc1 100644
--- a/Documentation/devicetree/bindings/chosen.txt
+++ b/Documentation/devicetree/bindings/chosen.txt
@@ -52,3 +52,40 @@ This property is set (currently only on PowerPC, and only 
needed on
 book3e) by some versions of kexec-tools to tell the new kernel that it
 is being booted by kexec, as the booting environment may differ (e.g.
 a different secondary CPU release mechanism)
+
+linux,usable-memory-range
+-
+
+This property (arm64 only) holds a base address and size, describing a
+limited region in which memory may be considered available for use by
+the kernel. Memory outside of this range is not available for use.
+
+This property describes a limitation: memory within this range is only
+valid when also described through another mechanism that the kernel
+would otherwise use to determine available memory (e.g. memory nodes
+or the EFI memory map). Valid memory may be sparse within the range.
+e.g.
+
+/ {
+   chosen {
+   linux,usable-memory-range = <0x9 0xf000 0x0 0x1000>;
+   };
+};
+
+The main usage is for crash dump kernel to identify its own usable
+memory and exclude, at its boot time, any other memory areas that are
+part of the panicked kernel's memory.
+
+linux,elfcorehdr
+
+
+This property (currently used only on arm64) holds the memory range,
+the address and the size, of the elf core header which mainly describes
+the panicked kernel's memory layout as PT_LOAD segments of elf format.
+e.g.
+
+/ {
+   chosen {
+   linux,elfcorehdr = <0x9 0xf000 0x0 0x800>;
+   };
+};
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v30 10/11] Documentation: kdump: describe arm64 port

2017-01-24 Thread AKASHI Takahiro
Add arch specific descriptions about kdump usage on arm64 to kdump.txt.

Signed-off-by: AKASHI Takahiro 
Reviewed-by: Baoquan He 
Acked-by: Dave Young 
Acked-by: Catalin Marinas 
---
 Documentation/kdump/kdump.txt | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index b0eb27b956d9..615434d81108 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the 
network to
 a remote system.
 
 Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
-s390x and arm architectures.
+s390x, arm and arm64 architectures.
 
 When the system kernel boots, it reserves a small section of memory for
 the dump-capture kernel. This ensures that ongoing Direct Memory Access
@@ -249,6 +249,13 @@ Dump-capture kernel config options (Arch Dependent, arm)
 
 AUTO_ZRELADDR=y
 
+Dump-capture kernel config options (Arch Dependent, arm64)
+--
+
+- Please note that kvm of the dump-capture kernel will not be enabled
+  on non-VHE systems even if it is configured. This is because the CPU
+  will not be reset to EL2 on panic.
+
 Extended crashkernel syntax
 ===
 
@@ -305,6 +312,8 @@ Boot into System Kernel
kernel will automatically locate the crash kernel image within the
first 512MB of RAM if X is not given.
 
+   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
+   the kernel, X if explicitly specified, must be aligned to 2MiB (0x20).
 
 Load the Dump-capture Kernel
 
@@ -327,6 +336,8 @@ For s390x:
- Use image or bzImage
 For arm:
- Use zImage
+For arm64:
+   - Use vmlinux or Image
 
 If you are using a uncompressed vmlinux image then use following command
 to load dump-capture kernel.
@@ -370,6 +381,9 @@ For s390x:
 For arm:
"1 maxcpus=1 reset_devices"
 
+For arm64:
+   "1 maxcpus=1 reset_devices"
+
 Notes on loading the dump-capture kernel:
 
 * By default, the ELF headers are stored in ELF64 format to support
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v30 09/11] arm64: kdump: enable kdump in defconfig

2017-01-24 Thread AKASHI Takahiro
Kdump is enabled by default as kexec is.

Signed-off-by: AKASHI Takahiro 
Acked-by: Catalin Marinas 
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 33b744d54739..94c2ea523a8a 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -79,6 +79,7 @@ CONFIG_CMA=y
 CONFIG_SECCOMP=y
 CONFIG_XEN=y
 CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
 CONFIG_CPU_IDLE=y
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v30 07/11] arm64: kdump: add VMCOREINFO's for user-space tools

2017-01-24 Thread AKASHI Takahiro
In addition to common VMCOREINFO's defined in
crash_save_vmcoreinfo_init(), we need to know, for crash utility,
  - kimage_voffset
  - PHYS_OFFSET
to examine the contents of a dump file (/proc/vmcore) correctly
due to the introduction of KASLR (CONFIG_RANDOMIZE_BASE) in v4.6.

  - VA_BITS
is also required for makedumpfile command.

arch_crash_save_vmcoreinfo() appends them to the dump file.
More VMCOREINFO's may be added later.

Signed-off-by: AKASHI Takahiro 
Reviewed-by: James Morse 
Acked-by: Catalin Marinas 
---
 arch/arm64/kernel/machine_kexec.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/kernel/machine_kexec.c 
b/arch/arm64/kernel/machine_kexec.c
index d56ea8c805a8..84c5761af336 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -17,6 +17,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -275,3 +276,13 @@ void arch_kexec_unprotect_crashkres(void)
 
flush_tlb_all();
 }
+
+void arch_crash_save_vmcoreinfo(void)
+{
+   VMCOREINFO_NUMBER(VA_BITS);
+   /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
+   vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
+   kimage_voffset);
+   vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
+   PHYS_OFFSET);
+}
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v30 08/11] arm64: kdump: provide /proc/vmcore file

2017-01-24 Thread AKASHI Takahiro
Add arch-specific functions to provide a dump file, /proc/vmcore.

This file is in ELF format and its ELF header needs to be prepared by
userspace tools, like kexec-tools, in adance. The primary kernel is
responsible to allocate the region with reserve_elfcorehdr() at boot time
and advertize its location to crash dump kernel via a new device-tree
property, "linux,elfcorehdr".

Then crash dump kernel will access the primary kernel's memory with
copy_oldmem_page(), which feeds the data page-by-page by ioremap'ing it
since it does not reside in linear mapping on crash dump kernel.

We also need our own elfcorehdr_read() here since the header is placed
within crash dump kernel's usable memory.

Signed-off-by: AKASHI Takahiro 
Reviewed-by: James Morse 
Acked-by: Catalin Marinas 
---
 arch/arm64/Kconfig | 11 +++
 arch/arm64/kernel/Makefile |  1 +
 arch/arm64/kernel/crash_dump.c | 71 ++
 arch/arm64/mm/init.c   | 53 +++
 4 files changed, 136 insertions(+)
 create mode 100644 arch/arm64/kernel/crash_dump.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 111742126897..2bd6a1a062b9 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -693,6 +693,17 @@ config KEXEC
  but it is independent of the system firmware.   And like a reboot
  you can start any kernel with it, not just Linux.
 
+config CRASH_DUMP
+   bool "Build kdump crash kernel"
+   help
+ Generate crash dump after being started by kexec. This should
+ be normally only set in special crash dump kernels which are
+ loaded in the main kernel with kexec-tools into a specially
+ reserved region and then later executed after a crash by
+ kdump/kexec.
+
+ For more details see Documentation/kdump/kdump.txt
+
 config XEN_DOM0
def_bool y
depends on XEN
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7d66bbaafc0c..6a7384eee08d 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -50,6 +50,7 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE)+= kaslr.o
 arm64-obj-$(CONFIG_HIBERNATION)+= hibernate.o hibernate-asm.o
 arm64-obj-$(CONFIG_KEXEC)  += machine_kexec.o relocate_kernel.o
\
   cpu-reset.o
+arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 
 obj-y  += $(arm64-obj-y) vdso/ probes/
 obj-m  += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c
new file mode 100644
index ..c3d5a21c081e
--- /dev/null
+++ b/arch/arm64/kernel/crash_dump.c
@@ -0,0 +1,71 @@
+/*
+ * Routines for doing kexec-based kdump
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+size_t csize, unsigned long offset,
+int userbuf)
+{
+   void *vaddr;
+
+   if (!csize)
+   return 0;
+
+   vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+   if (!vaddr)
+   return -ENOMEM;
+
+   if (userbuf) {
+   if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
+   memunmap(vaddr);
+   return -EFAULT;
+   }
+   } else {
+   memcpy(buf, vaddr + offset, csize);
+   }
+
+   memunmap(vaddr);
+
+   return csize;
+}
+
+/**
+ * elfcorehdr_read - read from ELF core header
+ * @buf: buffer where the data is placed
+ * @csize: number of bytes to read
+ * @ppos: address in the memory
+ *
+ * This function reads @count bytes from elf core header which exists
+ * on crash dump kernel's memory.
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+   memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
+   return count;
+}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 2aba75dc7720..323b87197e18 100644
--- 

[PATCH v30 06/11] arm64: kdump: implement machine_crash_shutdown()

2017-01-24 Thread AKASHI Takahiro
Primary kernel calls machine_crash_shutdown() to shut down non-boot cpus
and save registers' status in per-cpu ELF notes before starting crash
dump kernel. See kernel_kexec().
Even if not all secondary cpus have shut down, we do kdump anyway.

As we don't have to make non-boot(crashed) cpus offline (to preserve
correct status of cpus at crash dump) before shutting down, this patch
also adds a variant of smp_send_stop().

Signed-off-by: AKASHI Takahiro 
Reviewed-by: James Morse 
Acked-by: Catalin Marinas 
---
 arch/arm64/include/asm/hardirq.h  |  2 +-
 arch/arm64/include/asm/kexec.h| 42 +-
 arch/arm64/include/asm/smp.h  |  2 ++
 arch/arm64/kernel/machine_kexec.c | 55 +++---
 arch/arm64/kernel/smp.c   | 63 +++
 5 files changed, 158 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index 8740297dac77..1473fc2f7ab7 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
 #include 
 #include 
 
-#define NR_IPI 6
+#define NR_IPI 7
 
 typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 04744dc5fb61..f40ace1fa21a 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -40,7 +40,47 @@
 static inline void crash_setup_regs(struct pt_regs *newregs,
struct pt_regs *oldregs)
 {
-   /* Empty routine needed to avoid build errors. */
+   if (oldregs) {
+   memcpy(newregs, oldregs, sizeof(*newregs));
+   } else {
+   u64 tmp1, tmp2;
+
+   __asm__ __volatile__ (
+   "stp x0,   x1, [%2, #16 *  0]\n"
+   "stp x2,   x3, [%2, #16 *  1]\n"
+   "stp x4,   x5, [%2, #16 *  2]\n"
+   "stp x6,   x7, [%2, #16 *  3]\n"
+   "stp x8,   x9, [%2, #16 *  4]\n"
+   "stpx10,  x11, [%2, #16 *  5]\n"
+   "stpx12,  x13, [%2, #16 *  6]\n"
+   "stpx14,  x15, [%2, #16 *  7]\n"
+   "stpx16,  x17, [%2, #16 *  8]\n"
+   "stpx18,  x19, [%2, #16 *  9]\n"
+   "stpx20,  x21, [%2, #16 * 10]\n"
+   "stpx22,  x23, [%2, #16 * 11]\n"
+   "stpx24,  x25, [%2, #16 * 12]\n"
+   "stpx26,  x27, [%2, #16 * 13]\n"
+   "stpx28,  x29, [%2, #16 * 14]\n"
+   "mov %0,  sp\n"
+   "stpx30,  %0,  [%2, #16 * 15]\n"
+
+   "/* faked current PSTATE */\n"
+   "mrs %0, CurrentEL\n"
+   "mrs %1, SPSEL\n"
+   "orr %0, %0, %1\n"
+   "mrs %1, DAIF\n"
+   "orr %0, %0, %1\n"
+   "mrs %1, NZCV\n"
+   "orr %0, %0, %1\n"
+   /* pc */
+   "adr %1, 1f\n"
+   "1:\n"
+   "stp %1, %0,   [%2, #16 * 16]\n"
+   : "=" (tmp1), "=" (tmp2)
+   : "r" (newregs)
+   : "memory"
+   );
+   }
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index d050d720a1b4..cea009f2657d 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -148,6 +148,8 @@ static inline void cpu_panic_kernel(void)
  */
 bool cpus_are_stuck_in_kernel(void);
 
+extern void smp_send_crash_stop(void);
+
 #endif /* ifndef __ASSEMBLY__ */
 
 #endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/kernel/machine_kexec.c 
b/arch/arm64/kernel/machine_kexec.c
index f7938fecf3ff..d56ea8c805a8 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -9,6 +9,9 @@
  * published by the Free Software Foundation.
  */
 
+#include 
+#include 
+#include 
 #include 
 #include 
 
@@ -161,7 +164,8 @@ void machine_kexec(struct kimage *kimage)
/*
 * New cpus may have become stuck_in_kernel after we loaded the image.
 */
-   BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1));
+   BUG_ON((cpus_are_stuck_in_kernel() || (num_online_cpus() > 1)) &&
+   !WARN_ON(kimage == kexec_crash_image));
 
reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
 
@@ -200,15 +204,58 @@ void machine_kexec(struct kimage *kimage)
 * relocation is complete.
 */
 
-   cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
-   kimage->start, 

[PATCH v30 04/11] arm64: mm: allow for unmapping memory region from kernel mapping

2017-01-24 Thread AKASHI Takahiro
The current implementation of create_mapping_late() is only allowed
to modify permission attributes (read-only or read-write) against
the existing kernel mapping.

In this patch, PAGE_KERNEL_INVALID protection attribute is introduced.
We will now be able to invalidate (or unmap) some part of the existing
kernel mapping by specifying PAGE_KERNEL_INVALID to create_mapping_late().

This feature will be used in a suceeding kdump patch to protect
the memory reserved for crash dump kernel once after loaded.

Signed-off-by: AKASHI Takahiro 
---
 arch/arm64/include/asm/mmu.h   |  2 ++
 arch/arm64/include/asm/pgtable-hwdef.h |  2 ++
 arch/arm64/include/asm/pgtable-prot.h  |  1 +
 arch/arm64/include/asm/pgtable.h   |  4 
 arch/arm64/mm/mmu.c| 29 -
 5 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 47619411f0ff..a6c1367527bc 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -36,6 +36,8 @@ extern void init_mem_pgprot(void);
 extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
   unsigned long virt, phys_addr_t size,
   pgprot_t prot, bool page_mappings_only);
+extern void create_mapping_late(phys_addr_t phys, unsigned long virt,
+   phys_addr_t size, pgprot_t prot);
 extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
 
 #endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h 
b/arch/arm64/include/asm/pgtable-hwdef.h
index eb0c2bd90de9..e66efec31ca9 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -119,6 +119,7 @@
 #define PUD_TABLE_BIT  (_AT(pgdval_t, 1) << 1)
 #define PUD_TYPE_MASK  (_AT(pgdval_t, 3) << 0)
 #define PUD_TYPE_SECT  (_AT(pgdval_t, 1) << 0)
+#define PUD_VALID  PUD_TYPE_SECT
 
 /*
  * Level 2 descriptor (PMD).
@@ -128,6 +129,7 @@
 #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_SECT  (_AT(pmdval_t, 1) << 0)
 #define PMD_TABLE_BIT  (_AT(pmdval_t, 1) << 1)
+#define PMD_VALID  PMD_TYPE_SECT
 
 /*
  * Section
diff --git a/arch/arm64/include/asm/pgtable-prot.h 
b/arch/arm64/include/asm/pgtable-prot.h
index 2142c7726e76..945d84cd5df7 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -54,6 +54,7 @@
 #define PAGE_KERNEL_ROX__pgprot(_PAGE_DEFAULT | PTE_UXN | 
PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC   __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | 
PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | 
PTE_WRITE | PTE_CONT)
+#define PAGE_KERNEL_INVALID__pgprot(0)
 
 #define PAGE_HYP   __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
 #define PAGE_HYP_EXEC  __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index ffbb9a520563..1904a7c07018 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -364,6 +364,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, 
unsigned long pfn,
 
 #define pmd_bad(pmd)   (!(pmd_val(pmd) & PMD_TABLE_BIT))
 
+#define pmd_valid(pmd) (!!(pmd_val(pmd) & PMD_VALID))
+
 #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
 PMD_TYPE_TABLE)
 #define pmd_sect(pmd)  ((pmd_val(pmd) & PMD_TYPE_MASK) == \
@@ -428,6 +430,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
 #define pud_none(pud)  (!pud_val(pud))
 #define pud_bad(pud)   (!(pud_val(pud) & PUD_TABLE_BIT))
+#define pud_valid(pud) (!!(pud_val(pud) & PUD_VALID))
 #define pud_present(pud)   (pud_val(pud))
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -481,6 +484,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 
 #define pgd_none(pgd)  (!pgd_val(pgd))
 #define pgd_bad(pgd)   (!(pgd_val(pgd) & 2))
+#define pgd_valid(pgd) (!!(pgd_val(pgd) & 1))
 #define pgd_present(pgd)   (pgd_val(pgd))
 
 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 17243e43184e..9c7adcce8e4e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -133,7 +133,8 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 * Set the contiguous bit for the subsequent group of PTEs if
 * its size and alignment are appropriate.
 */
-   if (((addr | PFN_PHYS(pfn)) & ~CONT_PTE_MASK) == 0) {
+   if ((pgprot_val(prot) & PTE_VALID) &&
+   (((addr | PFN_PHYS(pfn)) & ~CONT_PTE_MASK) == 0)) {
if (end - addr >= CONT_PTE_SIZE && !page_mappings_only)
 

[PATCH v30 03/11] arm64: kdump: reserve memory for crash dump kernel

2017-01-24 Thread AKASHI Takahiro
"crashkernel=" kernel parameter specifies the size (and optionally
the start address) of the system ram used by crash dump kernel.
reserve_crashkernel() will allocate and reserve the memory at the startup
of primary kernel.

This memory range will be exported to userspace via an entry named
"Crash kernel" in /proc/iomem.

Signed-off-by: AKASHI Takahiro 
Signed-off-by: Mark Salter 
Signed-off-by: Pratyush Anand 
Reviewed-by: James Morse 
Acked-by: Catalin Marinas 
---
 arch/arm64/kernel/setup.c |  7 +-
 arch/arm64/mm/init.c  | 62 +++
 2 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index b051367e2149..515e9c6696df 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -31,7 +31,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -224,6 +223,12 @@ static void __init request_standard_resources(void)
if (kernel_data.start >= res->start &&
kernel_data.end <= res->end)
request_resource(res, _data);
+#ifdef CONFIG_KEXEC_CORE
+   /* Userspace will find "Crash kernel" region in /proc/iomem. */
+   if (crashk_res.end && crashk_res.start >= res->start &&
+   crashk_res.end <= res->end)
+   request_resource(res, _res);
+#endif
}
 }
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 6cddb566eb21..2aba75dc7720 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -30,12 +30,14 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -76,6 +78,63 @@ static int __init early_initrd(char *p)
 early_param("initrd", early_initrd);
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+/*
+ * reserve_crashkernel() - reserves memory for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
+{
+   unsigned long long crash_base, crash_size;
+   int ret;
+
+   ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+   _size, _base);
+   /* no crashkernel= or invalid value specified */
+   if (ret || !crash_size)
+   return;
+
+   crash_size = PAGE_ALIGN(crash_size);
+
+   if (crash_base == 0) {
+   /* Current arm64 boot protocol requires 2MB alignment */
+   crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
+   crash_size, SZ_2M);
+   if (crash_base == 0) {
+   pr_warn("Unable to allocate crashkernel (size:%llx)\n",
+   crash_size);
+   return;
+   }
+   } else {
+   /* User specifies base address explicitly. */
+   if (!memblock_is_region_memory(crash_base, crash_size) ||
+   memblock_is_region_reserved(crash_base, crash_size)) {
+   pr_warn("crashkernel has wrong address or size\n");
+   return;
+   }
+
+   if (!IS_ALIGNED(crash_base, SZ_2M)) {
+   pr_warn("crashkernel base address is not 2MB 
aligned\n");
+   return;
+   }
+   }
+   memblock_reserve(crash_base, crash_size);
+
+   pr_info("Reserving %lldMB of memory at %lldMB for crashkernel\n",
+   crash_size >> 20, crash_base >> 20);
+
+   crashk_res.start = crash_base;
+   crashk_res.end = crash_base + crash_size - 1;
+}
+#else
+static void __init reserve_crashkernel(void)
+{
+}
+#endif /* CONFIG_KEXEC_CORE */
+
 /*
  * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
  * currently assumes that for memory starting above 4G, 32-bit devices will
@@ -331,6 +390,9 @@ void __init arm64_memblock_init(void)
arm64_dma_phys_limit = max_zone_dma_phys();
else
arm64_dma_phys_limit = PHYS_MASK + 1;
+
+   reserve_crashkernel();
+
dma_contiguous_reserve(arm64_dma_phys_limit);
 
memblock_allow_resize();
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v30 02/11] arm64: limit memory regions based on DT property, usable-memory-range

2017-01-24 Thread AKASHI Takahiro
Crash dump kernel utilizes only a subset of available memory as System RAM.
On arm64 kdump, This memory range is advertized to crash dump kernel via
a device-tree property under /chosen,
   linux,usable-memory-range = 

Crash dump kernel reads this property at boot time and calls
memblock_cap_memory_range() to limit usable memory ranges which are
described as entries in UEFI memory map table or "memory" nodes in
a device tree blob.

Signed-off-by: AKASHI Takahiro 
Reviewed-by: Geoff Levand 
Acked-by: Catalin Marinas 
---
 arch/arm64/mm/init.c | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 380ebe705093..6cddb566eb21 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -187,10 +187,45 @@ static int __init early_mem(char *p)
 }
 early_param("mem", early_mem);
 
+static int __init early_init_dt_scan_usablemem(unsigned long node,
+   const char *uname, int depth, void *data)
+{
+   struct memblock_region *usablemem = (struct memblock_region *)data;
+   const __be32 *reg;
+   int len;
+
+   usablemem->size = 0;
+
+   if (depth != 1 || strcmp(uname, "chosen") != 0)
+   return 0;
+
+   reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", );
+   if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+   return 1;
+
+   usablemem->base = dt_mem_next_cell(dt_root_addr_cells, );
+   usablemem->size = dt_mem_next_cell(dt_root_size_cells, );
+
+   return 1;
+}
+
+static void __init fdt_enforce_memory_region(void)
+{
+   struct memblock_region reg;
+
+   of_scan_flat_dt(early_init_dt_scan_usablemem, );
+
+   if (reg.size)
+   memblock_cap_memory_range(reg.base, reg.size);
+}
+
 void __init arm64_memblock_init(void)
 {
const s64 linear_region_size = -(s64)PAGE_OFFSET;
 
+   /* Handle linux,usable-memory-range property */
+   fdt_enforce_memory_region();
+
/*
 * Ensure that the linear region takes up exactly half of the kernel
 * virtual address space. This way, we can distinguish a linear address
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v30 01/11] memblock: add memblock_cap_memory_range()

2017-01-24 Thread AKASHI Takahiro
Add memblock_cap_memory_range() which will remove all the memblock regions
except the memory range specified in the arguments. In addition, rework is
done on memblock_mem_limit_remove_map() to re-implement it using
memblock_cap_memory_range().

This function, like memblock_mem_limit_remove_map(), will not remove
memblocks with MEMMAP_NOMAP attribute as they may be mapped and accessed
later as "device memory."
See the commit a571d4eb55d8 ("mm/memblock.c: add new infrastructure to
address the mem limit issue").

This function is used, in a succeeding patch in the series of arm64 kdump
suuport, to limit the range of usable memory, or System RAM, on crash dump
kernel.
(Please note that "mem=" parameter is of little use for this purpose.)

Signed-off-by: AKASHI Takahiro 
Reviewed-by: Will Deacon 
Acked-by: Catalin Marinas 
Acked-by: Dennis Chen 
Cc: linux...@kvack.org
Cc: Andrew Morton 
---
 include/linux/memblock.h |  1 +
 mm/memblock.c| 44 +---
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 5b759c9acf97..fbfcacc50c29 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -333,6 +333,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
 phys_addr_t memblock_start_of_DRAM(void);
 phys_addr_t memblock_end_of_DRAM(void);
 void memblock_enforce_memory_limit(phys_addr_t memory_limit);
+void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
 void memblock_mem_limit_remove_map(phys_addr_t limit);
 bool memblock_is_memory(phys_addr_t addr);
 int memblock_is_map_memory(phys_addr_t addr);
diff --git a/mm/memblock.c b/mm/memblock.c
index 7608bc305936..fea1688fef60 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1514,11 +1514,37 @@ void __init memblock_enforce_memory_limit(phys_addr_t 
limit)
  (phys_addr_t)ULLONG_MAX);
 }
 
+void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
+{
+   int start_rgn, end_rgn;
+   int i, ret;
+
+   if (!size)
+   return;
+
+   ret = memblock_isolate_range(, base, size,
+   _rgn, _rgn);
+   if (ret)
+   return;
+
+   /* remove all the MAP regions */
+   for (i = memblock.memory.cnt - 1; i >= end_rgn; i--)
+   if (!memblock_is_nomap([i]))
+   memblock_remove_region(, i);
+
+   for (i = start_rgn - 1; i >= 0; i--)
+   if (!memblock_is_nomap([i]))
+   memblock_remove_region(, i);
+
+   /* truncate the reserved regions */
+   memblock_remove_range(, 0, base);
+   memblock_remove_range(,
+   base + size, (phys_addr_t)ULLONG_MAX);
+}
+
 void __init memblock_mem_limit_remove_map(phys_addr_t limit)
 {
-   struct memblock_type *type = 
phys_addr_t max_addr;
-   int i, ret, start_rgn, end_rgn;
 
if (!limit)
return;
@@ -1529,19 +1555,7 @@ void __init memblock_mem_limit_remove_map(phys_addr_t 
limit)
if (max_addr == (phys_addr_t)ULLONG_MAX)
return;
 
-   ret = memblock_isolate_range(type, max_addr, (phys_addr_t)ULLONG_MAX,
-   _rgn, _rgn);
-   if (ret)
-   return;
-
-   /* remove all the MAP regions above the limit */
-   for (i = end_rgn - 1; i >= start_rgn; i--) {
-   if (!memblock_is_nomap(>regions[i]))
-   memblock_remove_region(type, i);
-   }
-   /* truncate the reserved regions */
-   memblock_remove_range(, max_addr,
- (phys_addr_t)ULLONG_MAX);
+   memblock_cap_memory_range(0, max_addr);
 }
 
 static int __init_memblock memblock_search(struct memblock_type *type, 
phys_addr_t addr)
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v30 00/11] arm64: add kdump support

2017-01-24 Thread AKASHI Takahiro
This patch series adds kdump support on arm64.

To load a crash-dump kernel to the systems, a series of patches to
kexec-tools[1] are also needed. Please use the latest one, v5 [2].
For your convinience, you can pick them up from:
   https://git.linaro.org/people/takahiro.akashi/linux-aarch64.git arm64/kdump
   https://git.linaro.org/people/takahiro.akashi/kexec-tools.git arm64/kdump

To examine vmcore (/proc/vmcore) on a crash-dump kernel, you can use
  - crash utility (v7.1.8 or later, i.e. master for now) [3]

I tested this patchset on fast model and hikey.
The previous version, v29, was also:
Tested-by: Pratyush Anand  (mustang and seattle)

Changes for v30 (Jan 24, 2017)
  o rebased to Linux-v4.10-rc5
  o remove "linux,crashkernel-base/size" from exported device tree
  o protect memory region for crash-dump kernel (adding patch#4,5)
  o remove "in_crash_kexec" variable
  o and other trivial changes

Changes for v29 (Dec 28, 2016)
  o rebased to Linux-v4.10-rc1
  o change asm constraints in crash_setup_regs() per Catalin

Changes for v28 (Nov 22, 2016)
  o rebased to Linux-v4.9-rc6
  o revamp patch #1 and merge memblock_cap_memory_range() with
memblock_mem_limit_remove_map()

Changes for v27 (Nov 1, 2016)
  o rebased to Linux-v4.9-rc3
  o revert v26 change, i.e. revive "linux,usable-memory-range" property
(patch #2/#3, updating patch #9)
  o minor fixes per review comments (patch #3/#4/#6/#8)
  o re-order patches and improve commit messages for readability

Changes for v26 (Sep 7, 2016):
  o Use /reserved-memory instead of "linux,usable-memory-range" property
(dropping v25's patch#2 and #3, updating ex-patch#9.)

Changes for v25 (Aug 29, 2016):
  o Rebase to Linux-4.8-rc4
  o Use memremap() instead of ioremap_cache() [patch#5]

Changes for v24 (Aug 9, 2016):
  o Rebase to Linux-4.8-rc1
  o Update descriptions about newly added DT proerties

Changes for v23 (July 26, 2016):

  o Move memblock_reserve() to a single place in reserve_crashkernel()
  o Use  cpu_park_loop() in ipi_cpu_crash_stop()
  o Always enforce ARCH_LOW_ADDRESS_LIMIT to the memory range of crash kernel
  o Re-implement fdt_enforce_memory_region() to remove non-reserve regions
(for ACPI) from usable memory at crash kernel

Changes for v22 (July 12, 2016):

  o Export "crashkernel-base" and "crashkernel-size" via device-tree,
and add some descriptions about them in chosen.txt
  o Rename "usable-memory" to "usable-memory-range" to avoid inconsistency
with powerpc's "usable-memory"
  o Make cosmetic changes regarding "ifdef" usage
  o Correct some wordings in kdump.txt

Changes for v21 (July 6, 2016):

  o Remove kexec patches.
  o Rebase to arm64's for-next/core (Linux-4.7-rc4 based).
  o Clarify the description about kvm in kdump.txt.

See the link [4] for older changes.


[1] https://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
[2] http://lists.infradead.org/pipermail/kexec/2017-January/018002.html
[3] https://github.com/crash-utility/crash.git
[4] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-June/438780.html

AKASHI Takahiro (10):
  memblock: add memblock_cap_memory_range()
  arm64: limit memory regions based on DT property, usable-memory-range
  arm64: kdump: reserve memory for crash dump kernel
  arm64: mm: allow for unmapping memory region from kernel mapping
  arm64: kdump: protect crash dump kernel memory
  arm64: kdump: implement machine_crash_shutdown()
  arm64: kdump: add VMCOREINFO's for user-space tools
  arm64: kdump: provide /proc/vmcore file
  arm64: kdump: enable kdump in defconfig
  Documentation: kdump: describe arm64 port

James Morse (1):
  Documentation: dt: chosen properties for arm64 kdump

 Documentation/devicetree/bindings/chosen.txt |  37 +++
 Documentation/kdump/kdump.txt|  16 ++-
 arch/arm64/Kconfig   |  11 ++
 arch/arm64/configs/defconfig |   1 +
 arch/arm64/include/asm/hardirq.h |   2 +-
 arch/arm64/include/asm/kexec.h   |  42 +++-
 arch/arm64/include/asm/mmu.h |   2 +
 arch/arm64/include/asm/pgtable-hwdef.h   |   2 +
 arch/arm64/include/asm/pgtable-prot.h|   1 +
 arch/arm64/include/asm/pgtable.h |   4 +
 arch/arm64/include/asm/smp.h |   2 +
 arch/arm64/kernel/Makefile   |   1 +
 arch/arm64/kernel/crash_dump.c   |  71 +
 arch/arm64/kernel/machine_kexec.c| 132 ++-
 arch/arm64/kernel/setup.c|   7 +-
 arch/arm64/kernel/smp.c  |  63 +++
 arch/arm64/mm/init.c | 150 +++
 arch/arm64/mm/mmu.c  |  63 +--
 include/linux/memblock.h |   1 +
 mm/memblock.c|  44 +---
 20 files changed, 596 insertions(+), 56 deletions(-)
 create mode 100644 

[PATCH v5 9/9] arm64: kdump: Add support for binary image files

2017-01-24 Thread AKASHI Takahiro
From: Pratyush Anand 

This patch adds support to use binary image ie arch/arm64/boot/Image with
kdump.

Signed-off-by: Pratyush Anand 
[takahiro.aka...@linaro.org: a bit reworked]
Signed-off-by: AKASHI Takahiro 
---
 kexec/arch/arm64/kexec-image-arm64.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/kexec/arch/arm64/kexec-image-arm64.c 
b/kexec/arch/arm64/kexec-image-arm64.c
index 960ed96..982e431 100644
--- a/kexec/arch/arm64/kexec-image-arm64.c
+++ b/kexec/arch/arm64/kexec-image-arm64.c
@@ -4,7 +4,9 @@
 
 #define _GNU_SOURCE
 
+#include "crashdump-arm64.h"
 #include "kexec-arm64.h"
+#include "kexec-syscall.h"
 #include 
 
 int image_arm64_probe(const char *kernel_buf, off_t kernel_size)
@@ -58,6 +60,16 @@ int image_arm64_load(int argc, char **argv, const char 
*kernel_buf,
dbgprintf("%s: PE format:  %s\n", __func__,
(arm64_header_check_pe_sig(header) ? "yes" : "no"));
 
+   if (info->kexec_flags & KEXEC_ON_CRASH) {
+   /* create and initialize elf core header segment */
+   result = load_crashdump_segments(info);
+   if (result) {
+   dbgprintf("%s: Creating eflcorehdr failed.\n",
+   __func__);
+   goto exit;
+   }
+   }
+
/* load the kernel */
add_segment_phys_virt(info, kernel_buf, kernel_size,
kernel_segment + arm64_mem.text_offset,
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v5 8/9] arm64: kdump: add DT properties to crash dump kernel's dtb

2017-01-24 Thread AKASHI Takahiro
We pass the following properties to crash dump kernel:
linux,elfcorehdr: elf core header segment,
  same as "elfcorehdr=" kernel parameter on other archs
linux,usable-memory-range: usable memory reserved for crash dump kernel

Signed-off-by: AKASHI Takahiro 
---
 kexec/arch/arm64/kexec-arm64.c | 71 +++---
 kexec/arch/arm64/kexec-elf-arm64.c |  5 ---
 2 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
index 5e30107..f62c4b3 100644
--- a/kexec/arch/arm64/kexec-arm64.c
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -128,9 +128,6 @@ int arch_process_options(int argc, char **argv)
case OPT_INITRD:
arm64_opts.initrd = optarg;
break;
-   case OPT_PANIC:
-   die("load-panic (-p) not supported");
-   break;
default:
break; /* Ignore core and unknown options. */
}
@@ -285,8 +282,12 @@ on_success:
  * setup_2nd_dtb - Setup the 2nd stage kernel's dtb.
  */
 
-static int setup_2nd_dtb(struct dtb *dtb, char *command_line)
+static int setup_2nd_dtb(struct dtb *dtb, char *command_line, int on_crash)
 {
+   char *new_buf;
+   int new_size;
+   int nodeoffset;
+   uint64_t range[2];
int result;
 
result = fdt_check_header(dtb->buf);
@@ -298,8 +299,67 @@ static int setup_2nd_dtb(struct dtb *dtb, char 
*command_line)
 
result = set_bootargs(dtb, command_line);
 
+   if (on_crash) {
+   nodeoffset = fdt_path_offset(dtb->buf, "/chosen");
+   fdt_delprop(dtb->buf, nodeoffset, "linux,elfcorehdr");
+   fdt_delprop(dtb->buf, nodeoffset, "linux,usable-memory-range");
+   new_size = fdt_totalsize(dtb->buf)
+   + 2 * (sizeof(struct fdt_property)
+   + FDT_TAGALIGN(sizeof(range)))
+   + strlen("linux,elfcorehdr") + 1
+   + strlen("linux,usable-memory-range") + 1;
+
+   new_buf = xmalloc(new_size);
+   result = fdt_open_into(dtb->buf, new_buf, new_size);
+   if (result) {
+   dbgprintf("%s: fdt_open_into failed: %s\n", __func__,
+   fdt_strerror(result));
+   result = -ENOSPC;
+   goto on_error;
+   }
+
+   range[0] = cpu_to_be64(elfcorehdr_mem.start);
+   range[1] = cpu_to_be64(elfcorehdr_mem.end
+   - elfcorehdr_mem.start + 1);
+   nodeoffset = fdt_path_offset(new_buf, "/chosen");
+   result = fdt_setprop(new_buf, nodeoffset, "linux,elfcorehdr",
+   (void *)range, sizeof(range));
+   if (result) {
+   dbgprintf("%s: fdt_setprop failed: %s\n", __func__,
+   fdt_strerror(result));
+   result = -EINVAL;
+   goto on_error;
+   }
+
+   range[0] = cpu_to_be64(crash_reserved_mem.start);
+   range[1] = cpu_to_be64(crash_reserved_mem.end
+   - crash_reserved_mem.start + 1);
+   nodeoffset = fdt_path_offset(new_buf, "/chosen");
+   result = fdt_setprop(new_buf, nodeoffset,
+   "linux,usable-memory-range",
+   (void *)range, sizeof(range));
+   if (result) {
+   dbgprintf("%s: fdt_setprop failed: %s\n", __func__,
+   fdt_strerror(result));
+   result = -EINVAL;
+   goto on_error;
+   }
+
+   fdt_pack(new_buf);
+   dtb->buf = new_buf;
+   dtb->size = fdt_totalsize(new_buf);
+   }
+
dump_reservemap(dtb);
 
+
+   return result;
+
+on_error:
+   fprintf(stderr, "kexec: %s failed.\n", __func__);
+   if (new_buf)
+   free(new_buf);
+
return result;
 }
 
@@ -367,7 +427,8 @@ int arm64_load_other_segments(struct kexec_info *info,
}
}
 
-   result = setup_2nd_dtb(, command_line);
+   result = setup_2nd_dtb(, command_line,
+   info->kexec_flags & KEXEC_ON_CRASH);
 
if (result)
return -EFAILED;
diff --git a/kexec/arch/arm64/kexec-elf-arm64.c 
b/kexec/arch/arm64/kexec-elf-arm64.c
index 842ce21..b17a31a 100644
--- a/kexec/arch/arm64/kexec-elf-arm64.c
+++ b/kexec/arch/arm64/kexec-elf-arm64.c
@@ -47,11 +47,6 @@ int elf_arm64_load(int argc, char **argv, const char 
*kernel_buf,
int result;
int i;
 
-   if (info->kexec_flags & KEXEC_ON_CRASH) {
-   fprintf(stderr, "kexec: kdump not yet 

[PATCH v5 6/9] arm64: kdump: set up kernel image segment

2017-01-24 Thread AKASHI Takahiro
On arm64, we can use the same kernel image as 1st kernel, but
we have to modify the entry point as well as segments' addresses
in the kernel's elf header in order to load them into correct places.

Signed-off-by: AKASHI Takahiro 
---
 kexec/arch/arm64/crashdump-arm64.c | 23 +++
 kexec/arch/arm64/crashdump-arm64.h |  1 +
 kexec/arch/arm64/kexec-arm64.c | 25 -
 kexec/arch/arm64/kexec-elf-arm64.c | 10 +-
 4 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/kexec/arch/arm64/crashdump-arm64.c 
b/kexec/arch/arm64/crashdump-arm64.c
index 8346131..9517329 100644
--- a/kexec/arch/arm64/crashdump-arm64.c
+++ b/kexec/arch/arm64/crashdump-arm64.c
@@ -213,3 +213,26 @@ int load_crashdump_segments(struct kexec_info *info)
 
return 0;
 }
+
+/*
+ * e_entry and p_paddr are actually in virtual address space.
+ * Those values will be translated to physcal addresses by
+ * using virt_to_phys().
+ * So let's get ready for later use so the memory base (phys_offset)
+ * will be correctly replaced with crash_reserved_mem.start.
+ */
+void modify_ehdr_for_crashdump(struct mem_ehdr *ehdr)
+{
+   struct mem_phdr *phdr;
+   int i;
+
+   ehdr->e_entry += - arm64_mem.phys_offset + crash_reserved_mem.start;
+
+   for (i = 0; i < ehdr->e_phnum; i++) {
+   phdr = >e_phdr[i];
+   if (phdr->p_type != PT_LOAD)
+   continue;
+   phdr->p_paddr +=
+   (-arm64_mem.phys_offset + crash_reserved_mem.start);
+   }
+}
diff --git a/kexec/arch/arm64/crashdump-arm64.h 
b/kexec/arch/arm64/crashdump-arm64.h
index da75a2d..382f571 100644
--- a/kexec/arch/arm64/crashdump-arm64.h
+++ b/kexec/arch/arm64/crashdump-arm64.h
@@ -21,5 +21,6 @@ extern struct memory_range crash_reserved_mem;
 extern struct memory_range elfcorehdr_mem;
 
 extern int load_crashdump_segments(struct kexec_info *info);
+extern void modify_ehdr_for_crashdump(struct mem_ehdr *ehdr);
 
 #endif /* CRASHDUMP_ARM64_H */
diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
index d02b9da..5a1da2e 100644
--- a/kexec/arch/arm64/kexec-arm64.c
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -307,12 +307,27 @@ unsigned long arm64_locate_kernel_segment(struct 
kexec_info *info)
 {
unsigned long hole;
 
-   hole = locate_hole(info,
-   arm64_mem.text_offset + arm64_mem.image_size,
-   MiB(2), 0, ULONG_MAX, 1);
+   if (info->kexec_flags & KEXEC_ON_CRASH) {
+   unsigned long hole_end;
+
+   hole = (crash_reserved_mem.start < mem_min ?
+   mem_min : crash_reserved_mem.start);
+   hole = _ALIGN_UP(hole, MiB(2));
+   hole_end = hole + arm64_mem.text_offset + arm64_mem.image_size;
+
+   if ((hole_end > mem_max) ||
+   (hole_end > crash_reserved_mem.end)) {
+   dbgprintf("%s: Crash kernel out of range\n", __func__);
+   hole = ULONG_MAX;
+   }
+   } else {
+   hole = locate_hole(info,
+   arm64_mem.text_offset + arm64_mem.image_size,
+   MiB(2), 0, ULONG_MAX, 1);
 
-   if (hole == ULONG_MAX)
-   dbgprintf("%s: locate_hole failed\n", __func__);
+   if (hole == ULONG_MAX)
+   dbgprintf("%s: locate_hole failed\n", __func__);
+   }
 
return hole;
 }
diff --git a/kexec/arch/arm64/kexec-elf-arm64.c 
b/kexec/arch/arm64/kexec-elf-arm64.c
index c70a37a..842ce21 100644
--- a/kexec/arch/arm64/kexec-elf-arm64.c
+++ b/kexec/arch/arm64/kexec-elf-arm64.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 
+#include "crashdump-arm64.h"
 #include "kexec-arm64.h"
 #include "kexec-elf.h"
 #include "kexec-syscall.h"
@@ -105,7 +106,8 @@ int elf_arm64_load(int argc, char **argv, const char 
*kernel_buf,
}
 
arm64_mem.vp_offset = _ALIGN_DOWN(ehdr.e_entry, MiB(2));
-   arm64_mem.vp_offset -= kernel_segment - get_phys_offset();
+   if (!(info->kexec_flags & KEXEC_ON_CRASH))
+   arm64_mem.vp_offset -= kernel_segment - get_phys_offset();
 
dbgprintf("%s: kernel_segment: %016lx\n", __func__, kernel_segment);
dbgprintf("%s: text_offset:%016lx\n", __func__,
@@ -127,6 +129,12 @@ int elf_arm64_load(int argc, char **argv, const char 
*kernel_buf,
__func__);
goto exit;
}
+
+   /*
+* offset addresses in order to fit vmlinux
+* (elf_exec) into crash kernel's memory
+*/
+   modify_ehdr_for_crashdump();
}
 
/* load the kernel */
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v5 7/9] arm64: kdump: set up other segments

2017-01-24 Thread AKASHI Takahiro
We make sure that all the other segments, initrd and device-tree blob,
also be loaded into the reserved memory of crash dump kernel.

Signed-off-by: AKASHI Takahiro 
---
 kexec/arch/arm64/kexec-arm64.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
index 5a1da2e..5e30107 100644
--- a/kexec/arch/arm64/kexec-arm64.c
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -375,7 +375,10 @@ int arm64_load_other_segments(struct kexec_info *info,
/* Put the other segments after the image. */
 
hole_min = image_base + arm64_mem.image_size;
-   hole_max = ULONG_MAX;
+   if (info->kexec_flags & KEXEC_ON_CRASH)
+   hole_max = crash_reserved_mem.end;
+   else
+   hole_max = ULONG_MAX;
 
if (arm64_opts.initrd) {
initrd_buf = slurp_file(arm64_opts.initrd, _size);
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v5 4/9] arm64: kdump: identify memory regions

2017-01-24 Thread AKASHI Takahiro
The following regions need to be identified for later use:
 a) memory regions which belong to the 1st kernel
 b) usable memory reserved for crash dump kernel

We go through /proc/iomem to find out a) and b) which are marked
as "System RAM" and "Crash kernel", respectively.

Signed-off-by: AKASHI Takahiro 
---
 kexec/arch/arm64/Makefile  |   2 +
 kexec/arch/arm64/crashdump-arm64.c | 100 -
 kexec/arch/arm64/crashdump-arm64.h |  14 +-
 kexec/arch/arm64/iomem.h   |   1 +
 4 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/kexec/arch/arm64/Makefile b/kexec/arch/arm64/Makefile
index 74b677f..2d4ae0e 100644
--- a/kexec/arch/arm64/Makefile
+++ b/kexec/arch/arm64/Makefile
@@ -6,6 +6,8 @@ arm64_FS2DT_INCLUDE += \
 
 arm64_DT_OPS += kexec/dt-ops.c
 
+arm64_MEM_REGIONS = kexec/mem_regions.c
+
 arm64_CPPFLAGS += -I $(srcdir)/kexec/
 
 arm64_KEXEC_SRCS += \
diff --git a/kexec/arch/arm64/crashdump-arm64.c 
b/kexec/arch/arm64/crashdump-arm64.c
index d2272c8..dcaca43 100644
--- a/kexec/arch/arm64/crashdump-arm64.c
+++ b/kexec/arch/arm64/crashdump-arm64.c
@@ -1,5 +1,13 @@
 /*
  * ARM64 crashdump.
+ * partly derived from arm implementation
+ *
+ * Copyright (c) 2014-2016 Linaro Limited
+ * Author: AKASHI Takahiro 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
  */
 
 #define _GNU_SOURCE
@@ -10,12 +18,102 @@
 #include "kexec.h"
 #include "crashdump.h"
 #include "crashdump-arm64.h"
+#include "iomem.h"
 #include "kexec-arm64.h"
 #include "kexec-elf.h"
+#include "mem_regions.h"
 
-struct memory_ranges usablemem_rgns = {};
+/* memory ranges on crashed kernel */
+static struct memory_range crash_memory_ranges[CRASH_MAX_MEMORY_RANGES];
+static struct memory_ranges crash_memory_rgns = {
+   .size = 0,
+   .max_size = CRASH_MAX_MEMORY_RANGES,
+   .ranges = crash_memory_ranges,
+};
+
+/* memory range reserved for crashkernel */
+struct memory_range crash_reserved_mem;
+struct memory_ranges usablemem_rgns = {
+   .size = 0,
+   .max_size = 1,
+   .ranges = _reserved_mem,
+};
+
+/*
+ * iomem_range_callback() - callback called for each iomem region
+ * @data: not used
+ * @nr: not used
+ * @str: name of the memory region
+ * @base: start address of the memory region
+ * @length: size of the memory region
+ *
+ * This function is called once for each memory region found in /proc/iomem.
+ * It locates system RAM and crashkernel reserved memory and places these to
+ * variables, respectively, crash_memory_ranges and crash_reserved_mem.
+ */
+
+static int iomem_range_callback(void *UNUSED(data), int UNUSED(nr),
+   char *str, unsigned long long base,
+   unsigned long long length)
+{
+   if (strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)) == 0)
+   return mem_regions_add(_rgns,
+  base, length, RANGE_RAM);
+   else if (strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0)
+   return mem_regions_add(_memory_rgns,
+  base, length, RANGE_RAM);
+
+   return 0;
+}
 
 int is_crashkernel_mem_reserved(void)
 {
+   if (!crash_reserved_mem.end)
+   kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL);
+
+   return crash_reserved_mem.start != crash_reserved_mem.end;
+}
+
+/*
+ * crash_get_memory_ranges() - read system physical memory
+ *
+ * Function reads through system physical memory and stores found memory
+ * regions in crash_memory_ranges.
+ * Regions are sorted in ascending order.
+ *
+ * Returns 0 in case of success and -1 otherwise (errno is set).
+ */
+static int crash_get_memory_ranges(void)
+{
+   /*
+* First read all memory regions that can be considered as
+* system memory including the crash area.
+*/
+   if (!usablemem_rgns.size)
+   kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL);
+
+   /* allow only a single region for crash dump kernel */
+   if (usablemem_rgns.size != 1) {
+   errno = EINVAL;
+   return -1;
+   }
+
+   dbgprint_mem_range("Reserved memory range", _reserved_mem, 1);
+
+   if (mem_regions_exclude(_memory_rgns, _reserved_mem)) {
+   fprintf(stderr,
+   "Error: Number of crash memory ranges excedeed the max 
limit\n");
+   errno = ENOMEM;
+   return -1;
+   }
+
+   /*
+* Make sure that the memory regions are sorted.
+*/
+   mem_regions_sort(_memory_rgns);
+
+   dbgprint_mem_range("Coredump memory ranges",
+  crash_memory_rgns.ranges, crash_memory_rgns.size);
+
return 0;
 }
diff --git a/kexec/arch/arm64/crashdump-arm64.h 

[PATCH v5 5/9] arm64: kdump: add elf core header segment

2017-01-24 Thread AKASHI Takahiro
Elf core header contains the information necessary for the coredump of
the 1st kernel, including its physcal memory layout as well as cpu register
states at the panic.
The segment is allocated inside the reserved memory of crash dump kernel.

Signed-off-by: AKASHI Takahiro 
---
 kexec/arch/arm64/crashdump-arm64.c | 96 ++
 kexec/arch/arm64/crashdump-arm64.h |  3 ++
 kexec/arch/arm64/iomem.h   |  2 +
 kexec/arch/arm64/kexec-elf-arm64.c | 10 
 4 files changed, 111 insertions(+)

diff --git a/kexec/arch/arm64/crashdump-arm64.c 
b/kexec/arch/arm64/crashdump-arm64.c
index dcaca43..8346131 100644
--- a/kexec/arch/arm64/crashdump-arm64.c
+++ b/kexec/arch/arm64/crashdump-arm64.c
@@ -39,6 +39,39 @@ struct memory_ranges usablemem_rgns = {
.ranges = _reserved_mem,
 };
 
+struct memory_range elfcorehdr_mem;
+
+static struct crash_elf_info elf_info = {
+   .class  = ELFCLASS64,
+#if (__BYTE_ORDER == __LITTLE_ENDIAN)
+   .data   = ELFDATA2LSB,
+#else
+   .data   = ELFDATA2MSB,
+#endif
+   .machine= EM_AARCH64,
+};
+
+/*
+ * Note: The returned value is correct only if !CONFIG_RANDOMIZE_BASE.
+ */
+static uint64_t get_kernel_page_offset(void)
+{
+   int i;
+
+   if (elf_info.kern_vaddr_start == UINT64_MAX)
+   return UINT64_MAX;
+
+   /* Current max virtual memory range is 48-bits. */
+   for (i = 48; i > 0; i--)
+   if (!(elf_info.kern_vaddr_start & (1UL << i)))
+   break;
+
+   if (i <= 0)
+   return UINT64_MAX;
+   else
+   return UINT64_MAX << i;
+}
+
 /*
  * iomem_range_callback() - callback called for each iomem region
  * @data: not used
@@ -62,6 +95,10 @@ static int iomem_range_callback(void *UNUSED(data), int 
UNUSED(nr),
else if (strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0)
return mem_regions_add(_memory_rgns,
   base, length, RANGE_RAM);
+   else if (strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) == 0)
+   elf_info.kern_paddr_start = base;
+   else if (strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) == 0)
+   elf_info.kern_size = base + length - elf_info.kern_paddr_start;
 
return 0;
 }
@@ -115,5 +152,64 @@ static int crash_get_memory_ranges(void)
dbgprint_mem_range("Coredump memory ranges",
   crash_memory_rgns.ranges, crash_memory_rgns.size);
 
+   /*
+* For additional kernel code/data segment.
+* kern_paddr_start/kern_size are determined in iomem_range_callback
+*/
+   elf_info.kern_vaddr_start = get_kernel_sym("_text");
+   if (!elf_info.kern_vaddr_start)
+   elf_info.kern_vaddr_start = UINT64_MAX;
+
+   return 0;
+}
+
+/*
+ * load_crashdump_segments() - load the elf core header
+ * @info: kexec info structure
+ *
+ * This function creates and loads an additional segment of elf core header
+ : which is used to construct /proc/vmcore on crash dump kernel.
+ *
+ * Return 0 in case of success and -1 in case of error.
+ */
+
+int load_crashdump_segments(struct kexec_info *info)
+{
+   unsigned long elfcorehdr;
+   unsigned long bufsz;
+   void *buf;
+   int err;
+
+   /*
+* First fetch all the memory (RAM) ranges that we are going to
+* pass to the crash dump kernel during panic.
+*/
+
+   err = crash_get_memory_ranges();
+
+   if (err)
+   return err;
+
+   elf_info.page_offset = get_kernel_page_offset();
+   dbgprintf("%s: page_offset:   %016llx\n", __func__,
+   elf_info.page_offset);
+
+   err = crash_create_elf64_headers(info, _info,
+   crash_memory_rgns.ranges, crash_memory_rgns.size,
+   , , ELF_CORE_HEADER_ALIGN);
+
+   if (err)
+   return err;
+
+   elfcorehdr = add_buffer_phys_virt(info, buf, bufsz, bufsz, 0,
+   crash_reserved_mem.start, crash_reserved_mem.end,
+   -1, 0);
+
+   elfcorehdr_mem.start = elfcorehdr;
+   elfcorehdr_mem.end = elfcorehdr + bufsz - 1;
+
+   dbgprintf("%s: elfcorehdr 0x%llx-0x%llx\n", __func__,
+   elfcorehdr_mem.start, elfcorehdr_mem.end);
+
return 0;
 }
diff --git a/kexec/arch/arm64/crashdump-arm64.h 
b/kexec/arch/arm64/crashdump-arm64.h
index 07a0ed0..da75a2d 100644
--- a/kexec/arch/arm64/crashdump-arm64.h
+++ b/kexec/arch/arm64/crashdump-arm64.h
@@ -18,5 +18,8 @@
 
 extern struct memory_ranges usablemem_rgns;
 extern struct memory_range crash_reserved_mem;
+extern struct memory_range elfcorehdr_mem;
+
+extern int load_crashdump_segments(struct kexec_info *info);
 
 #endif /* CRASHDUMP_ARM64_H */
diff --git a/kexec/arch/arm64/iomem.h b/kexec/arch/arm64/iomem.h
index 20cda87..d4864bb 100644
--- a/kexec/arch/arm64/iomem.h
+++ b/kexec/arch/arm64/iomem.h

[PATCH v5 3/9] arm64: identify PHYS_OFFSET correctly

2017-01-24 Thread AKASHI Takahiro
Due to the kernel patch, commit e7cd190385d1 ("arm64: mark reserved
memblock regions explicitly in iomem"), the current code will not be able
to identify the correct value of PHYS_OFFSET if some "reserved" memory
region, which is likely to be UEFI runtime services code/data, exists at
an address below the first "System RAM" regions.

This patch fixes this issue.

Signed-off-by: AKASHI Takahiro 
---
 kexec/arch/arm64/iomem.h   |  7 +++
 kexec/arch/arm64/kexec-arm64.c | 12 ++--
 2 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 kexec/arch/arm64/iomem.h

diff --git a/kexec/arch/arm64/iomem.h b/kexec/arch/arm64/iomem.h
new file mode 100644
index 000..7fd66eb
--- /dev/null
+++ b/kexec/arch/arm64/iomem.h
@@ -0,0 +1,7 @@
+#ifndef IOMEM_H
+#define IOMEM_H
+
+#define SYSTEM_RAM "System RAM\n"
+#define IOMEM_RESERVED "reserved\n"
+
+#endif
diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
index 04fd396..d02b9da 100644
--- a/kexec/arch/arm64/kexec-arm64.c
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -21,6 +21,7 @@
 #include "crashdump-arm64.h"
 #include "dt-ops.h"
 #include "fs2dt.h"
+#include "iomem.h"
 #include "kexec-syscall.h"
 #include "arch/options.h"
 
@@ -476,7 +477,14 @@ static int get_memory_ranges_iomem_cb(void *data, int nr, 
char *str,
return -1;
 
r = (struct memory_range *)data + nr;
-   r->type = RANGE_RAM;
+
+   if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)))
+   r->type = RANGE_RAM;
+   else if (!strncmp(str, IOMEM_RESERVED, strlen(IOMEM_RESERVED)))
+   r->type = RANGE_RESERVED;
+   else
+   return 1;
+
r->start = base;
r->end = base + length - 1;
 
@@ -495,7 +503,7 @@ static int get_memory_ranges_iomem_cb(void *data, int nr, 
char *str,
 static int get_memory_ranges_iomem(struct memory_range *array,
unsigned int *count)
 {
-   *count = kexec_iomem_for_each_line("System RAM\n",
+   *count = kexec_iomem_for_each_line(NULL,
get_memory_ranges_iomem_cb, array);
 
if (!*count) {
-- 
2.11.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v5 2/9] kexec: generalize and rename get_kernel_stext_sym()

2017-01-24 Thread AKASHI Takahiro
From: Pratyush Anand 

get_kernel_stext_sym() has been defined for both arm and i386. Other
architecture might need some other kernel symbol address. Therefore rewrite
this function as generic function to get any kernel symbol address.

More over, kallsyms is not arch specific representation, therefore have
common function for all arches.

Signed-off-by: Pratyush Anand 
[created symbols.c]
Signed-off-by: AKASHI Takahiro 
---
 kexec/Makefile  |  1 +
 kexec/arch/arm/crashdump-arm.c  | 40 +---
 kexec/arch/i386/crashdump-x86.c | 29 -
 kexec/kexec.h   |  2 ++
 kexec/symbols.c | 41 +
 5 files changed, 45 insertions(+), 68 deletions(-)
 create mode 100644 kexec/symbols.c

diff --git a/kexec/Makefile b/kexec/Makefile
index 39f365f..2b4fb3d 100644
--- a/kexec/Makefile
+++ b/kexec/Makefile
@@ -26,6 +26,7 @@ KEXEC_SRCS_base += kexec/kernel_version.c
 KEXEC_SRCS_base += kexec/lzma.c
 KEXEC_SRCS_base += kexec/zlib.c
 KEXEC_SRCS_base += kexec/kexec-xen.c
+KEXEC_SRCS_base += kexec/symbols.c
 
 KEXEC_GENERATED_SRCS += $(PURGATORY_HEX_C)
 
diff --git a/kexec/arch/arm/crashdump-arm.c b/kexec/arch/arm/crashdump-arm.c
index 4a89b5e..245c21a 100644
--- a/kexec/arch/arm/crashdump-arm.c
+++ b/kexec/arch/arm/crashdump-arm.c
@@ -73,48 +73,10 @@ static struct crash_elf_info elf_info = {
 
 extern unsigned long long user_page_offset;
 
-/* Retrieve kernel _stext symbol virtual address from /proc/kallsyms */
-static unsigned long long get_kernel_stext_sym(void)
-{
-   const char *kallsyms = "/proc/kallsyms";
-   const char *stext = "_stext";
-   char sym[128];
-   char line[128];
-   FILE *fp;
-   unsigned long long vaddr = 0;
-   char type;
-
-   fp = fopen(kallsyms, "r");
-   if (!fp) {
-   fprintf(stderr, "Cannot open %s\n", kallsyms);
-   return 0;
-   }
-
-   while(fgets(line, sizeof(line), fp) != NULL) {
-   unsigned long long addr;
-
-   if (sscanf(line, "%Lx %c %s", , , sym) != 3)
-   continue;
-
-   if (strcmp(sym, stext) == 0) {
-   dbgprintf("kernel symbol %s vaddr = %#llx\n", stext, 
addr);
-   vaddr = addr;
-   break;
-   }
-   }
-
-   fclose(fp);
-
-   if (vaddr == 0)
-   fprintf(stderr, "Cannot get kernel %s symbol address\n", stext);
-
-   return vaddr;
-}
-
 static int get_kernel_page_offset(struct kexec_info *info,
struct crash_elf_info *elf_info)
 {
-   unsigned long long stext_sym_addr = get_kernel_stext_sym();
+   unsigned long long stext_sym_addr = get_kernel_sym("_stext");
if (stext_sym_addr == 0) {
if (user_page_offset != (-1ULL)) {
elf_info->page_offset = user_page_offset;
diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c
index 88aeee3..a324c6c 100644
--- a/kexec/arch/i386/crashdump-x86.c
+++ b/kexec/arch/i386/crashdump-x86.c
@@ -102,35 +102,6 @@ static int get_kernel_paddr(struct kexec_info 
*UNUSED(info),
return -1;
 }
 
-/* Retrieve kernel symbol virtual address from /proc/kallsyms */
-static unsigned long long get_kernel_sym(const char *symbol)
-{
-   const char *kallsyms = "/proc/kallsyms";
-   char sym[128];
-   char line[128];
-   FILE *fp;
-   unsigned long long vaddr;
-   char type;
-
-   fp = fopen(kallsyms, "r");
-   if (!fp) {
-   fprintf(stderr, "Cannot open %s\n", kallsyms);
-   return 0;
-   }
-
-   while(fgets(line, sizeof(line), fp) != NULL) {
-   if (sscanf(line, "%Lx %c %s", , , sym) != 3)
-   continue;
-   if (strcmp(sym, symbol) == 0) {
-   dbgprintf("kernel symbol %s vaddr = %16llx\n", symbol, 
vaddr);
-   return vaddr;
-   }
-   }
-
-   fprintf(stderr, "Cannot get kernel %s symbol address\n", symbol);
-   return 0;
-}
-
 /* Retrieve info regarding virtual address kernel has been compiled for and
  * size of the kernel from /proc/kcore. Current /proc/kcore parsing from
  * from kexec-tools fails because of malformed elf notes. A kernel patch has
diff --git a/kexec/kexec.h b/kexec/kexec.h
index 9194f1c..b4fafad 100644
--- a/kexec/kexec.h
+++ b/kexec/kexec.h
@@ -312,4 +312,6 @@ int xen_kexec_load(struct kexec_info *info);
 int xen_kexec_unload(uint64_t kexec_flags);
 void xen_kexec_exec(void);
 
+extern unsigned long long get_kernel_sym(const char *text);
+
 #endif /* KEXEC_H */
diff --git a/kexec/symbols.c b/kexec/symbols.c
new file mode 100644
index 000..ea6e327
--- /dev/null
+++ b/kexec/symbols.c
@@ -0,0 +1,41 @@
+#include 
+#include 
+#include "kexec.h"
+
+/* Retrieve