[PATCH v2] fadump: fix endianess issues in firmware assisted dump handling

2014-10-01 Thread Hari Bathini
Firmware-assisted dump (fadump) kernel code is not LE compliant. The
below patch tries to fix this issue. Tested this patch with upstream
kernel. Did some sanity testing for the  LE fadump vmcore generated.
Below output shows crash tool successfully opening LE fadump vmcore.

# crash vmlinux vmcore

crash 7.0.5
Copyright (C) 2002-2014  Red Hat, Inc.
Copyright (C) 2004, 2005, 2006, 2010  IBM Corporation
Copyright (C) 1999-2006  Hewlett-Packard Co
Copyright (C) 2005, 2006, 2011, 2012  Fujitsu Limited
Copyright (C) 2006, 2007  VA Linux Systems Japan K.K.
Copyright (C) 2005, 2011  NEC Corporation
Copyright (C) 1999, 2002, 2007  Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002  Mission Critical Linux, Inc.
This program is free software, covered by the GNU General Public 
License,
and you are welcome to change it and/or distribute copies of it under
certain conditions.  Enter help copying to see the conditions.
This program has absolutely no warranty.  Enter help warranty for 
details.

crash: vmlinux: no .gnu_debuglink section
GNU gdb (GDB) 7.6
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later 
http://gnu.org/licenses/gpl.html
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type show 
copying
and show warranty for details.
This GDB was configured as powerpc64le-unknown-linux-gnu...

  KERNEL: vmlinux
DUMPFILE: vmcore
CPUS: 16
DATE: Wed Dec 31 19:00:00 1969
  UPTIME: 00:03:28
LOAD AVERAGE: 0.46, 0.86, 0.41
   TASKS: 268
NODENAME: linux-dhr2
 RELEASE: 3.17.0-rc5-7-default
 VERSION: #6 SMP Tue Sep 30 01:06:34 EDT 2014
 MACHINE: ppc64le  (4116 Mhz)
  MEMORY: 40 GB
   PANIC: Oops: Kernel access of bad area, sig: 11 [#1] (check 
log for details)
 PID: 6223
 COMMAND: bash
TASK: c009661b2500  [THREAD_INFO: c00967ac]
 CPU: 2
   STATE: TASK_RUNNING (PANIC)

crash

Changes in v2:
1. Addressed casting related warnings.
2. Elaborated on why exceptions should not be changed to big endian during 
fadump boot.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/fadump.h |   52 ---
 arch/powerpc/kernel/fadump.c  |  114 +
 arch/powerpc/platforms/pseries/lpar.c |   15 
 3 files changed, 96 insertions(+), 85 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index a677456..493e72f 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -70,39 +70,39 @@
 #define CPU_UNKNOWN(~((u32)0))
 
 /* Utility macros */
-#define SKIP_TO_NEXT_CPU(reg_entry)\
-({ \
-   while (reg_entry-reg_id != REG_ID(CPUEND))   \
-   reg_entry++;\
-   reg_entry++;\
+#define SKIP_TO_NEXT_CPU(reg_entry)\
+({ \
+   while (be64_to_cpu(reg_entry-reg_id) != REG_ID(CPUEND))  \
+   reg_entry++;\
+   reg_entry++;\
 })
 
 /* Kernel Dump section info */
 struct fadump_section {
-   u32 request_flag;
-   u16 source_data_type;
-   u16 error_flags;
-   u64 source_address;
-   u64 source_len;
-   u64 bytes_dumped;
-   u64 destination_address;
+   __be32  request_flag;
+   __be16  source_data_type;
+   __be16  error_flags;
+   __be64  source_address;
+   __be64  source_len;
+   __be64  bytes_dumped;
+   __be64  destination_address;
 };
 
 /* ibm,configure-kernel-dump header. */
 struct fadump_section_header {
-   u32 dump_format_version;
-   u16 dump_num_sections;
-   u16 dump_status_flag;
-   u32 offset_first_dump_section;
+   __be32  dump_format_version;
+   __be16  dump_num_sections;
+   __be16  dump_status_flag;
+   __be32  offset_first_dump_section;
 
/* Fields for disk dump option. */
-   u32 dd_block_size;
-   u64 dd_block_offset;
-   u64 dd_num_blocks;
-   u32 dd_offset_disk_path;
+   __be32  dd_block_size;
+   __be64  dd_block_offset;
+   __be64  dd_num_blocks;
+   __be32  dd_offset_disk_path;
 
/* Maximum time allowed to prevent an automatic dump-reboot. */
-   u32

[PATCH] PPC64: Adding symbols in vmcoreinfo to facilitate dump filtering

2013-11-15 Thread Hari Bathini
When CONFIG_SPARSEMEM_VMEMMAP option is used in kernel, makedumpfile fails
to filter vmcore dump as it fails to do vmemmap translations. So far
dump filtering on ppc64 never had to deal with vmemmap addresses seperately
as vmemmap regions where mapped in zone normal. But with the inclusion of
CONFIG_SPARSEMEM_VMEMMAP config option in kernel, this vmemmap address
translation support becomes necessary for dump filtering. For vmemmap adress
translation, few kernel symbols are needed by dump filtering tool. This patch
adds those symbols to vmcoreinfo, which a dump filtering tool can use for
filtering the kernel dump. Tested this changes successfully with makedumpfile
tool that supports vmemmap to physical address translation outside zone normal.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pgalloc-64.h |4 
 arch/powerpc/kernel/machine_kexec.c   |   12 
 2 files changed, 16 insertions(+)

diff --git a/arch/powerpc/include/asm/pgalloc-64.h 
b/arch/powerpc/include/asm/pgalloc-64.h
index f65e27b..33e507a 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -17,6 +17,10 @@ struct vmemmap_backing {
unsigned long virt_addr;
 };
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+extern struct vmemmap_backing *vmemmap_list;
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
 /*
  * Functions that deal with pagetables that could be at any level of
  * the table need to be passed an index_size so they know how to
diff --git a/arch/powerpc/kernel/machine_kexec.c 
b/arch/powerpc/kernel/machine_kexec.c
index e1ec57e..88a7fb4 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -18,6 +18,7 @@
 #include linux/ftrace.h
 
 #include asm/machdep.h
+#include asm/pgalloc.h
 #include asm/prom.h
 #include asm/sections.h
 
@@ -75,6 +76,17 @@ void arch_crash_save_vmcoreinfo(void)
 #ifndef CONFIG_NEED_MULTIPLE_NODES
VMCOREINFO_SYMBOL(contig_page_data);
 #endif
+#if defined(CONFIG_PPC64)  defined(CONFIG_SPARSEMEM_VMEMMAP)
+   VMCOREINFO_SYMBOL(vmemmap_list);
+   VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
+   VMCOREINFO_SYMBOL(mmu_psize_defs);
+   VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
+   VMCOREINFO_OFFSET(vmemmap_backing, list);
+   VMCOREINFO_OFFSET(vmemmap_backing, phys);
+   VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
+   VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
+   VMCOREINFO_OFFSET(mmu_psize_def, shift);
+#endif
 }
 
 /*

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2] PPC64: Adding symbols in vmcoreinfo to facilitate dump filtering

2013-11-24 Thread Hari Bathini
When CONFIG_SPARSEMEM_VMEMMAP option is set in kernel, makedumpfile
tool fails to filter vmcore dump as it fails to do translations for
vmemmap addresses that are mapped outside zone normal. For vmemmap
adress translation support in this scenario, few kernel symbols are
needed by dump filtering tool. This patch adds those symbols to
vmcoreinfo, which a dump filtering tool can use for filtering the
kernel dump. This changes are tested successfully with makedumpfile
tool that supports vmemmap to physical address translation outside
zone normal.

Changes from v1:
Updated patch decription and removed #ifdef around extern.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pgalloc-64.h |2 ++
 arch/powerpc/kernel/machine_kexec.c   |   12 
 2 files changed, 14 insertions(+)

diff --git a/arch/powerpc/include/asm/pgalloc-64.h 
b/arch/powerpc/include/asm/pgalloc-64.h
index f65e27b..3973e62 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -17,6 +17,8 @@ struct vmemmap_backing {
unsigned long virt_addr;
 };
 
+extern struct vmemmap_backing *vmemmap_list;
+
 /*
  * Functions that deal with pagetables that could be at any level of
  * the table need to be passed an index_size so they know how to
diff --git a/arch/powerpc/kernel/machine_kexec.c 
b/arch/powerpc/kernel/machine_kexec.c
index e1ec57e..88a7fb4 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -18,6 +18,7 @@
 #include linux/ftrace.h
 
 #include asm/machdep.h
+#include asm/pgalloc.h
 #include asm/prom.h
 #include asm/sections.h
 
@@ -75,6 +76,17 @@ void arch_crash_save_vmcoreinfo(void)
 #ifndef CONFIG_NEED_MULTIPLE_NODES
VMCOREINFO_SYMBOL(contig_page_data);
 #endif
+#if defined(CONFIG_PPC64)  defined(CONFIG_SPARSEMEM_VMEMMAP)
+   VMCOREINFO_SYMBOL(vmemmap_list);
+   VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
+   VMCOREINFO_SYMBOL(mmu_psize_defs);
+   VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
+   VMCOREINFO_OFFSET(vmemmap_backing, list);
+   VMCOREINFO_OFFSET(vmemmap_backing, phys);
+   VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
+   VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
+   VMCOREINFO_OFFSET(mmu_psize_def, shift);
+#endif
 }
 
 /*

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 0/2] powerpc/pstore: Add pstore support for nvram partitions

2014-12-03 Thread Hari Bathini
This patch series adds pstore support on powernv platform to
read different nvram partitions and write compressed data to
oops-log nvram partition. As pseries platform already has
pstore support, this series moves most of the common code
for pseries and powernv platforms to a common file. Tested
the patches successfully on both pseries and powernv
platforms.

---

Hari Bathini (2):
  pstore: Add pstore type id for firmware partition
  pstore: add pstore support on powernv


 arch/powerpc/include/asm/nvram.h|   50 ++
 arch/powerpc/include/asm/rtas.h |2 
 arch/powerpc/kernel/nvram_64.c  |  679 +++
 arch/powerpc/platforms/powernv/opal-nvram.c |   10 
 arch/powerpc/platforms/pseries/nvram.c  |  663 --
 fs/pstore/inode.c   |3 
 include/linux/pstore.h  |1 
 7 files changed, 749 insertions(+), 659 deletions(-)

--
- Hari

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/2] pstore: Add pstore type id for firmware partition

2014-12-03 Thread Hari Bathini
This patch adds a pstore type id to be used for opal specific
nvram partitions.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 fs/pstore/inode.c  |3 +++
 include/linux/pstore.h |1 +
 2 files changed, 4 insertions(+)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index fafb7a0..e83bb93 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -337,6 +337,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, 
u64 id, int count,
case PSTORE_TYPE_PPC_COMMON:
sprintf(name, powerpc-common-%s-%lld, psname, id);
break;
+   case PSTORE_TYPE_PPC_OPAL:
+   sprintf(name, powerpc-opal-%s-%lld, psname, id);
+   break;
case PSTORE_TYPE_UNKNOWN:
sprintf(name, unknown-%s-%lld, psname, id);
break;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index ece0c6b..af44980 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -39,6 +39,7 @@ enum pstore_type_id {
PSTORE_TYPE_PPC_RTAS= 4,
PSTORE_TYPE_PPC_OF  = 5,
PSTORE_TYPE_PPC_COMMON  = 6,
+   PSTORE_TYPE_PPC_OPAL= 7,
PSTORE_TYPE_UNKNOWN = 255
 };
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] pstore: add pstore support on powernv

2014-12-03 Thread Hari Bathini
This patch extends pstore, a generic interface to platform dependent
persistent storage, support for powernv  platform to capture certain
useful information, during dying moments. Such support is already in
place for  pseries platform.  This patch while adding pstore support
for  powernv platform,  moves common code for pseries and powernv to
arch/powerpc/kernel/nvram_64.c file.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/nvram.h|   50 ++
 arch/powerpc/include/asm/rtas.h |2 
 arch/powerpc/kernel/nvram_64.c  |  679 +++
 arch/powerpc/platforms/powernv/opal-nvram.c |   10 
 arch/powerpc/platforms/pseries/nvram.c  |  663 --
 5 files changed, 745 insertions(+), 659 deletions(-)

diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index b0fe0fe..09a518b 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -9,12 +9,43 @@
 #ifndef _ASM_POWERPC_NVRAM_H
 #define _ASM_POWERPC_NVRAM_H
 
-
+#include linux/types.h
 #include linux/errno.h
 #include linux/list.h
 #include uapi/asm/nvram.h
 
+/*
+ * Set oops header version to distinguish between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version  4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
+struct err_log_info {
+   __be32 error_type;
+   __be32 seq_num;
+};
+
+struct nvram_os_partition {
+   const char *name;
+   int req_size;   /* desired size, in bytes */
+   int min_size;   /* minimum acceptable size (0 means req_size) */
+   long size;  /* size of data portion (excluding err_log_info) */
+   long index; /* offset of data portion of partition */
+   bool os_partition; /* partition initialized by OS, not FW */
+};
+
+struct oops_log_info {
+   __be16 version;
+   __be16 report_length;
+   __be64 timestamp;
+} __attribute__((packed));
+
+extern struct nvram_os_partition oops_log_partition;
+
 #ifdef CONFIG_PPC_PSERIES
+extern struct nvram_os_partition rtas_log_partition;
+
 extern int nvram_write_error_log(char * buff, int length,
 unsigned int err_type, unsigned int 
err_seq);
 extern int nvram_read_error_log(char * buff, int length,
@@ -50,6 +81,23 @@ extern void  pmac_xpram_write(int xpaddr, u8 data);
 /* Synchronize NVRAM */
 extern voidnvram_sync(void);
 
+/* Initialize NVRAM OS partition */
+extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
+
+/* Initialize NVRAM oops partition */
+extern void __init nvram_init_oops_partition(int rtas_partition_exists);
+
+/* Read a NVRAM partition */
+extern int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+   int length, unsigned int *err_type,
+   unsigned int *error_log_cnt);
+
+/* Write to NVRAM OS partition */
+extern int nvram_write_os_partition(struct nvram_os_partition *part,
+   char *buff, int length,
+   unsigned int err_type,
+   unsigned int error_log_cnt);
+
 /* Determine NVRAM size */
 extern ssize_t nvram_get_size(void);
 
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b390f55..a033fe9 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -343,6 +343,8 @@ extern int early_init_dt_scan_rtas(unsigned long node,
 extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
+extern unsigned long last_rtas_event;
+extern int clobbering_unread_rtas_event(void);
 extern int pseries_devicetree_update(s32 scope);
 extern void post_mobility_fixup(void);
 #endif
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 34f7c9b..8c439a3 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -26,6 +26,9 @@
 #include linux/init.h
 #include linux/slab.h
 #include linux/spinlock.h
+#include linux/kmsg_dump.h
+#include linux/pstore.h
+#include linux/zlib.h
 #include asm/uaccess.h
 #include asm/nvram.h
 #include asm/rtas.h
@@ -54,6 +57,682 @@ struct nvram_partition {
 
 static LIST_HEAD(nvram_partitions);
 
+#ifdef CONFIG_PPC_PSERIES
+struct nvram_os_partition rtas_log_partition = {
+   .name = ibm,rtas-log,
+   .req_size = 2079,
+   .min_size = 1055,
+   .index = -1,
+   .os_partition = true
+};
+#endif
+
+struct nvram_os_partition oops_log_partition = {
+   .name = lnx,oops-log,
+   .req_size = 4000,
+   .min_size = 2000,
+   .index = -1,
+   .os_partition = true
+};
+
+static const char *nvram_os_partitions[] = {
+#ifdef CONFIG_PPC_PSERIES
+   ibm,rtas-log,
+#endif
+   lnx,oops-log,
+   NULL
+};
+
+static void oops_to_nvram(struct kmsg_dumper *dumper

Re: [2/2] pstore: add pstore support on powernv

2014-12-04 Thread Hari Bathini

On 12/04/2014 11:07 AM, Michael Ellerman wrote:

On Wed, 2014-03-12 at 11:03:15 UTC, Hari Bathini wrote:

This patch extends pstore, a generic interface to platform dependent
persistent storage, support for powernv  platform to capture certain
useful information, during dying moments. Such support is already in
place for  pseries platform.  This patch while adding pstore support
for  powernv platform,  moves common code for pseries and powernv to
arch/powerpc/kernel/nvram_64.c file.

Please move the common code first in a separate patch. Unless there's some
reason you absolutely can't do that.


Sure, Michael. Let me make the changes as suggested and
post the updated patch series.

Thanks
Hari


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/kdump: skip enabling big endian exception during crash

2014-12-11 Thread Hari Bathini
In LE kernel, we currently have a hack for kexec that resets the exception 
endian
before starting a new kernel as the kernel that is loaded could be a big endian
or a little endian kernel. In kdump case, resetting exception endian fails when
one or more cpus is disabled. But in case of kdump, we can conveniently ignore
resetting endianess as crashkernel is always of same endianess as primary 
kernel.
This patch adds a new inline function to say if this is kdump path. This 
function
is used at places where such a check is needed.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/kexec.h   |   10 ++
 arch/powerpc/kernel/machine_kexec_64.c |2 +-
 arch/powerpc/platforms/pseries/lpar.c  |7 ++-
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 19c36cb..0d96d4d 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -86,6 +86,11 @@ extern int overlaps_crashkernel(unsigned long start, 
unsigned long size);
 extern void reserve_crashkernel(void);
 extern void machine_kexec_mask_interrupts(void);
 
+static inline int is_kdump_path(void)
+{
+   return (crashing_cpu = 0) ? 1 : 0;
+}
+
 #else /* !CONFIG_KEXEC */
 static inline void crash_kexec_secondary(struct pt_regs *regs) { }
 
@@ -106,6 +111,11 @@ static inline int 
crash_shutdown_unregister(crash_shutdown_t handler)
return 0;
 }
 
+static inline int is_kdump_path(void)
+{
+   return 0;
+}
+
 #endif /* CONFIG_KEXEC */
 #endif /* ! __ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/machine_kexec_64.c 
b/arch/powerpc/kernel/machine_kexec_64.c
index 879b3aa..b4fe804 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -330,7 +330,7 @@ void default_machine_kexec(struct kimage *image)
 * using debugger IPI.
 */
 
-   if (crashing_cpu == -1)
+   if (!is_kdump_path())
kexec_prepare_cpus();
 
pr_debug(kexec: Starting switchover sequence.\n);
diff --git a/arch/powerpc/platforms/pseries/lpar.c 
b/arch/powerpc/platforms/pseries/lpar.c
index f6880d2..be41680 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -43,6 +43,7 @@
 #include asm/trace.h
 #include asm/firmware.h
 #include asm/plpar_wrappers.h
+#include asm/kexec.h
 #include asm/fadump.h
 
 #include pseries.h
@@ -257,8 +258,12 @@ static void pSeries_lpar_hptab_clear(void)
 *
 * This is also called on boot when a fadump happens. In that case we
 * must not change the exception endian mode.
+*
+* This is also called during kdump which doesn't need resetting, as the
+* the crashkernel is of same endainess as primary kernel.
 */
-   if (firmware_has_feature(FW_FEATURE_SET_MODE)  !is_fadump_active()) {
+   if (firmware_has_feature(FW_FEATURE_SET_MODE)  !is_fadump_active() 
+   !is_kdump_path()) {
long rc;
 
rc = pseries_big_endian_exceptions();

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 0/3] powerpc/pstore: Add pstore support for nvram partitions

2014-12-16 Thread Hari Bathini
This patch series adds pstore support on powernv platform to
read different nvram partitions and write compressed data to
oops-log nvram partition. As pseries platform already has
pstore support, this series moves most of the common code
for pseries and powernv platforms to a common file. Tested
the patches successfully on both pseries and powernv
platforms.

---

Hari Bathini (3):
  powerpc/nvram: move generic code for nvram and pstore
  pstore: Add pstore type id for firmware partition
  pstore: add pstore support on powernv


 arch/powerpc/include/asm/nvram.h|   50 ++
 arch/powerpc/include/asm/rtas.h |2 
 arch/powerpc/kernel/nvram_64.c  |  681 +++
 arch/powerpc/platforms/powernv/opal-nvram.c |   10 
 arch/powerpc/platforms/pseries/nvram.c  |  665 --
 fs/pstore/inode.c   |3 
 include/linux/pstore.h  |1 
 7 files changed, 751 insertions(+), 661 deletions(-)

--
- Hari

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/3] powerpc/nvram: move generic code for nvram and pstore

2014-12-16 Thread Hari Bathini
With minor checks, we can move most of the code for nvram
under pseries to a common place to be re-used by other
powerpc platforms like powernv. This patch moves such
common code to arch/powerpc/kernel/nvram_64.c file.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/nvram.h   |   50 ++
 arch/powerpc/include/asm/rtas.h|2 
 arch/powerpc/kernel/nvram_64.c |  660 
 arch/powerpc/platforms/pseries/nvram.c |  665 
 4 files changed, 716 insertions(+), 661 deletions(-)

diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index b0fe0fe..09a518b 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -9,12 +9,43 @@
 #ifndef _ASM_POWERPC_NVRAM_H
 #define _ASM_POWERPC_NVRAM_H
 
-
+#include linux/types.h
 #include linux/errno.h
 #include linux/list.h
 #include uapi/asm/nvram.h
 
+/*
+ * Set oops header version to distinguish between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version  4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
+struct err_log_info {
+   __be32 error_type;
+   __be32 seq_num;
+};
+
+struct nvram_os_partition {
+   const char *name;
+   int req_size;   /* desired size, in bytes */
+   int min_size;   /* minimum acceptable size (0 means req_size) */
+   long size;  /* size of data portion (excluding err_log_info) */
+   long index; /* offset of data portion of partition */
+   bool os_partition; /* partition initialized by OS, not FW */
+};
+
+struct oops_log_info {
+   __be16 version;
+   __be16 report_length;
+   __be64 timestamp;
+} __attribute__((packed));
+
+extern struct nvram_os_partition oops_log_partition;
+
 #ifdef CONFIG_PPC_PSERIES
+extern struct nvram_os_partition rtas_log_partition;
+
 extern int nvram_write_error_log(char * buff, int length,
 unsigned int err_type, unsigned int 
err_seq);
 extern int nvram_read_error_log(char * buff, int length,
@@ -50,6 +81,23 @@ extern void  pmac_xpram_write(int xpaddr, u8 data);
 /* Synchronize NVRAM */
 extern voidnvram_sync(void);
 
+/* Initialize NVRAM OS partition */
+extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
+
+/* Initialize NVRAM oops partition */
+extern void __init nvram_init_oops_partition(int rtas_partition_exists);
+
+/* Read a NVRAM partition */
+extern int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+   int length, unsigned int *err_type,
+   unsigned int *error_log_cnt);
+
+/* Write to NVRAM OS partition */
+extern int nvram_write_os_partition(struct nvram_os_partition *part,
+   char *buff, int length,
+   unsigned int err_type,
+   unsigned int error_log_cnt);
+
 /* Determine NVRAM size */
 extern ssize_t nvram_get_size(void);
 
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b390f55..a033fe9 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -343,6 +343,8 @@ extern int early_init_dt_scan_rtas(unsigned long node,
 extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
+extern unsigned long last_rtas_event;
+extern int clobbering_unread_rtas_event(void);
 extern int pseries_devicetree_update(s32 scope);
 extern void post_mobility_fixup(void);
 #endif
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 34f7c9b..dbff7f0 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -26,6 +26,9 @@
 #include linux/init.h
 #include linux/slab.h
 #include linux/spinlock.h
+#include linux/kmsg_dump.h
+#include linux/pstore.h
+#include linux/zlib.h
 #include asm/uaccess.h
 #include asm/nvram.h
 #include asm/rtas.h
@@ -54,6 +57,663 @@ struct nvram_partition {
 
 static LIST_HEAD(nvram_partitions);
 
+#ifdef CONFIG_PPC_PSERIES
+struct nvram_os_partition rtas_log_partition = {
+   .name = ibm,rtas-log,
+   .req_size = 2079,
+   .min_size = 1055,
+   .index = -1,
+   .os_partition = true
+};
+#endif
+
+struct nvram_os_partition oops_log_partition = {
+   .name = lnx,oops-log,
+   .req_size = 4000,
+   .min_size = 2000,
+   .index = -1,
+   .os_partition = true
+};
+
+static const char *nvram_os_partitions[] = {
+#ifdef CONFIG_PPC_PSERIES
+   ibm,rtas-log,
+#endif
+   lnx,oops-log,
+   NULL
+};
+
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason);
+
+static struct kmsg_dumper nvram_kmsg_dumper = {
+   .dump = oops_to_nvram
+};
+
+/*
+ * For capturing and compressing an oops or panic report...
+
+ * big_oops_buf

[PATCH v2 2/3] pstore: Add pstore type id for firmware partition

2014-12-16 Thread Hari Bathini
This patch adds a pstore type id to be used for opal specific
nvram partitions.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 fs/pstore/inode.c  |3 +++
 include/linux/pstore.h |1 +
 2 files changed, 4 insertions(+)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 5041660..8e0c009 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -359,6 +359,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, 
u64 id, int count,
case PSTORE_TYPE_PPC_COMMON:
sprintf(name, powerpc-common-%s-%lld, psname, id);
break;
+   case PSTORE_TYPE_PPC_OPAL:
+   sprintf(name, powerpc-opal-%s-%lld, psname, id);
+   break;
case PSTORE_TYPE_UNKNOWN:
sprintf(name, unknown-%s-%lld, psname, id);
break;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index ece0c6b..af44980 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -39,6 +39,7 @@ enum pstore_type_id {
PSTORE_TYPE_PPC_RTAS= 4,
PSTORE_TYPE_PPC_OF  = 5,
PSTORE_TYPE_PPC_COMMON  = 6,
+   PSTORE_TYPE_PPC_OPAL= 7,
PSTORE_TYPE_UNKNOWN = 255
 };
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 3/3] pstore: add pstore support on powernv

2014-12-16 Thread Hari Bathini
This patch extends pstore, a generic interface to platform dependent
persistent storage, support for powernv  platform to capture certain
useful information, during dying moments. Such support is already in
place for  pseries platform.  This patch re-uses most of that code.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/nvram_64.c  |   25 +++--
 arch/powerpc/platforms/powernv/opal-nvram.c |   10 ++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index dbff7f0..3afbc91 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -127,6 +127,14 @@ static size_t oops_data_sz;
 static struct z_stream_s stream;
 
 #ifdef CONFIG_PSTORE
+#ifdef CONFIG_PPC_POWERNV
+static struct nvram_os_partition skiboot_partition = {
+   .name = ibm,skiboot,
+   .index = -1,
+   .os_partition = false
+};
+#endif
+
 #ifdef CONFIG_PPC_PSERIES
 static struct nvram_os_partition of_config_partition = {
.name = of-config,
@@ -479,6 +487,16 @@ static ssize_t nvram_pstore_read(u64 *id, enum 
pstore_type_id *type,
time-tv_nsec = 0;
break;
 #endif
+#ifdef CONFIG_PPC_POWERNV
+   case PSTORE_TYPE_PPC_OPAL:
+   sig = NVRAM_SIG_FW;
+   part = skiboot_partition;
+   *type = PSTORE_TYPE_PPC_OPAL;
+   *id = PSTORE_TYPE_PPC_OPAL;
+   time-tv_sec = 0;
+   time-tv_nsec = 0;
+   break;
+#endif
default:
return 0;
}
@@ -554,8 +572,11 @@ static int nvram_pstore_init(void)
 {
int rc = 0;
 
-   nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
-   nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+   if (machine_is(pseries)) {
+   nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
+   nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+   } else
+   nvram_type_ids[2] = PSTORE_TYPE_PPC_OPAL;
 
nvram_pstore_info.buf = oops_data;
nvram_pstore_info.bufsize = oops_data_sz;
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c 
b/arch/powerpc/platforms/powernv/opal-nvram.c
index f9896fd..9db4398 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -16,6 +16,7 @@
 #include linux/of.h
 
 #include asm/opal.h
+#include asm/nvram.h
 #include asm/machdep.h
 
 static unsigned int nvram_size;
@@ -62,6 +63,15 @@ static ssize_t opal_nvram_write(char *buf, size_t count, 
loff_t *index)
return count;
 }
 
+static int __init opal_nvram_init_log_partitions(void)
+{
+   /* Scan nvram for partitions */
+   nvram_scan_partitions();
+   nvram_init_oops_partition(0);
+   return 0;
+}
+machine_arch_initcall(powernv, opal_nvram_init_log_partitions);
+
 void __init opal_nvram_init(void)
 {
struct device_node *np;

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 1/3] powerpc/nvram: move generic code for nvram and pstore

2014-12-17 Thread Hari Bathini

On 12/17/2014 05:33 AM, Michael Ellerman wrote:

On Tue, 2014-12-16 at 23:35 +0530, Hari Bathini wrote:

With minor checks, we can move most of the code for nvram
under pseries to a common place to be re-used by other
powerpc platforms like powernv. This patch moves such
common code to arch/powerpc/kernel/nvram_64.c file.

Sharing the code is great.

But, you need to keep in mind that it is very common for us to build kernels
with both POWERNV=y and PSERIES=y.

So you need to make sure you're only using CONFIG_PPC_PSERIES to protect things
that are optional on pseries. Not things that we *shouldn't* be doing on
powernv.

For example the logic in nvram_init_oops_partition() looks like it might do the
wrong thing for PSERIES=y POWERNV=y.


True. It might do wrong thing when an incorrect value is passed by the 
caller.
But since the caller is platform specific code 
[pseries_nvram_init_log_partitions() or
opal_nvram_init_log_partitions() routine], with appropriate parameter 
passed,

I haven't seen any issues while testing.



diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b390f55..a033fe9 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -343,6 +343,8 @@ extern int early_init_dt_scan_rtas(unsigned long node,
  extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
  
  #ifdef CONFIG_PPC_PSERIES

+extern unsigned long last_rtas_event;
+extern int clobbering_unread_rtas_event(void);

You should add an empty version of this for !PSERIES, so you don't have to
ifdef all the call sites.


Sure. Will update accordingly..

Thanks
Hari


cheers




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2] powerpc/kdump: Ignore failure in enabling big endian exception during crash

2014-12-18 Thread Hari Bathini
In LE kernel, we currently have a hack for kexec that resets the exception
endian before starting a new kernel as the kernel that is loaded could be a
big endian or a little endian kernel. In kdump case, resetting exception
endian fails when one or more cpus is disabled. But we can ignore the failure
and still go ahead, as in most cases crashkernel will be of same endianess
as primary kernel and reseting endianess is not even needed in those cases.
This patch adds a new inline function to say if this is kdump path. This
function is used at places where such a check is needed.

Changes from v1:
Instead of skipping, ignore failure in enabling big endian exception
during crash

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/kexec.h   |   10 ++
 arch/powerpc/kernel/machine_kexec_64.c |2 +-
 arch/powerpc/platforms/pseries/lpar.c  |   10 +-
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 19c36cb..0d96d4d 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -86,6 +86,11 @@ extern int overlaps_crashkernel(unsigned long start, 
unsigned long size);
 extern void reserve_crashkernel(void);
 extern void machine_kexec_mask_interrupts(void);
 
+static inline int is_kdump_path(void)
+{
+   return (crashing_cpu = 0) ? 1 : 0;
+}
+
 #else /* !CONFIG_KEXEC */
 static inline void crash_kexec_secondary(struct pt_regs *regs) { }
 
@@ -106,6 +111,11 @@ static inline int 
crash_shutdown_unregister(crash_shutdown_t handler)
return 0;
 }
 
+static inline int is_kdump_path(void)
+{
+   return 0;
+}
+
 #endif /* CONFIG_KEXEC */
 #endif /* ! __ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/machine_kexec_64.c 
b/arch/powerpc/kernel/machine_kexec_64.c
index 879b3aa..b4fe804 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -330,7 +330,7 @@ void default_machine_kexec(struct kimage *image)
 * using debugger IPI.
 */
 
-   if (crashing_cpu == -1)
+   if (!is_kdump_path())
kexec_prepare_cpus();
 
pr_debug(kexec: Starting switchover sequence.\n);
diff --git a/arch/powerpc/platforms/pseries/lpar.c 
b/arch/powerpc/platforms/pseries/lpar.c
index 469751d..63214fa 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -43,6 +43,7 @@
 #include asm/trace.h
 #include asm/firmware.h
 #include asm/plpar_wrappers.h
+#include asm/kexec.h
 #include asm/fadump.h
 
 #include pseries.h
@@ -257,6 +258,7 @@ static void pSeries_lpar_hptab_clear(void)
 *
 * This is also called on boot when a fadump happens. In that case we
 * must not change the exception endian mode.
+*
 */
if (firmware_has_feature(FW_FEATURE_SET_MODE)  !is_fadump_active()) {
long rc;
@@ -267,8 +269,14 @@ static void pSeries_lpar_hptab_clear(void)
 * out to the user, but at least this will stop us from
 * continuing on further and creating an even more
 * difficult to debug situation.
+*
+* But if we reaching here after a crash, no point panicking.
+* Also, in kdump path, resetting endianess may not be needed
+* as the crashkernel most of the times is of same endianess
+* as primary kernel. So, let's ignore the failure and try
+* kdump'ing anyway.
 */
-   if (rc)
+   if (rc  !is_kdump_path())
panic(Could not enable big endian exceptions);
}
 #endif

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 0/3] powerpc/pstore: Add pstore support for nvram partitions

2014-12-24 Thread Hari Bathini
This patch series adds pstore support on powernv platform to
read different nvram partitions and write compressed data to
oops-log nvram partition. As pseries platform already has
pstore support, this series moves most of the common code
for pseries and powernv platforms to a common file. Tested
the patches successfully on both pseries and powernv
platforms.

Changes from v2:
Added an empty version of clobbering_unread_rtas_event()
routine for !PSERIES, to avoid ifdef at the call sites

---

Hari Bathini (3):
  powerpc/nvram: move generic code for nvram and pstore
  pstore: Add pstore type id for firmware partition
  pstore: add pstore support on powernv


 arch/powerpc/include/asm/nvram.h|   50 ++
 arch/powerpc/include/asm/rtas.h |4 
 arch/powerpc/kernel/nvram_64.c  |  677 +++
 arch/powerpc/platforms/powernv/opal-nvram.c |   10 
 arch/powerpc/platforms/pseries/nvram.c  |  665 ---
 fs/pstore/inode.c   |3 
 include/linux/pstore.h  |1 
 7 files changed, 749 insertions(+), 661 deletions(-)

--
- Hari

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/3] powerpc/nvram: move generic code for nvram and pstore

2014-12-24 Thread Hari Bathini
With minor checks, we can move most of the code for nvram
under pseries to a common place to be re-used by other
powerpc platforms like powernv. This patch moves such
common code to arch/powerpc/kernel/nvram_64.c file.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/nvram.h   |   50 ++
 arch/powerpc/include/asm/rtas.h|4 
 arch/powerpc/kernel/nvram_64.c |  656 
 arch/powerpc/platforms/pseries/nvram.c |  665 
 4 files changed, 714 insertions(+), 661 deletions(-)

diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index b0fe0fe..09a518b 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -9,12 +9,43 @@
 #ifndef _ASM_POWERPC_NVRAM_H
 #define _ASM_POWERPC_NVRAM_H
 
-
+#include linux/types.h
 #include linux/errno.h
 #include linux/list.h
 #include uapi/asm/nvram.h
 
+/*
+ * Set oops header version to distinguish between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version  4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
+struct err_log_info {
+   __be32 error_type;
+   __be32 seq_num;
+};
+
+struct nvram_os_partition {
+   const char *name;
+   int req_size;   /* desired size, in bytes */
+   int min_size;   /* minimum acceptable size (0 means req_size) */
+   long size;  /* size of data portion (excluding err_log_info) */
+   long index; /* offset of data portion of partition */
+   bool os_partition; /* partition initialized by OS, not FW */
+};
+
+struct oops_log_info {
+   __be16 version;
+   __be16 report_length;
+   __be64 timestamp;
+} __attribute__((packed));
+
+extern struct nvram_os_partition oops_log_partition;
+
 #ifdef CONFIG_PPC_PSERIES
+extern struct nvram_os_partition rtas_log_partition;
+
 extern int nvram_write_error_log(char * buff, int length,
 unsigned int err_type, unsigned int 
err_seq);
 extern int nvram_read_error_log(char * buff, int length,
@@ -50,6 +81,23 @@ extern void  pmac_xpram_write(int xpaddr, u8 data);
 /* Synchronize NVRAM */
 extern voidnvram_sync(void);
 
+/* Initialize NVRAM OS partition */
+extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
+
+/* Initialize NVRAM oops partition */
+extern void __init nvram_init_oops_partition(int rtas_partition_exists);
+
+/* Read a NVRAM partition */
+extern int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+   int length, unsigned int *err_type,
+   unsigned int *error_log_cnt);
+
+/* Write to NVRAM OS partition */
+extern int nvram_write_os_partition(struct nvram_os_partition *part,
+   char *buff, int length,
+   unsigned int err_type,
+   unsigned int error_log_cnt);
+
 /* Determine NVRAM size */
 extern ssize_t nvram_get_size(void);
 
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b390f55..bcf6693 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -343,8 +343,12 @@ extern int early_init_dt_scan_rtas(unsigned long node,
 extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
+extern unsigned long last_rtas_event;
+extern int clobbering_unread_rtas_event(void);
 extern int pseries_devicetree_update(s32 scope);
 extern void post_mobility_fixup(void);
+#else
+int clobbering_unread_rtas_event(void) { return 0; }
 #endif
 
 #ifdef CONFIG_PPC_RTAS_DAEMON
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 34f7c9b..42e5c6a 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -26,6 +26,9 @@
 #include linux/init.h
 #include linux/slab.h
 #include linux/spinlock.h
+#include linux/kmsg_dump.h
+#include linux/pstore.h
+#include linux/zlib.h
 #include asm/uaccess.h
 #include asm/nvram.h
 #include asm/rtas.h
@@ -54,6 +57,659 @@ struct nvram_partition {
 
 static LIST_HEAD(nvram_partitions);
 
+#ifdef CONFIG_PPC_PSERIES
+struct nvram_os_partition rtas_log_partition = {
+   .name = ibm,rtas-log,
+   .req_size = 2079,
+   .min_size = 1055,
+   .index = -1,
+   .os_partition = true
+};
+#endif
+
+struct nvram_os_partition oops_log_partition = {
+   .name = lnx,oops-log,
+   .req_size = 4000,
+   .min_size = 2000,
+   .index = -1,
+   .os_partition = true
+};
+
+static const char *nvram_os_partitions[] = {
+#ifdef CONFIG_PPC_PSERIES
+   ibm,rtas-log,
+#endif
+   lnx,oops-log,
+   NULL
+};
+
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason);
+
+static struct kmsg_dumper nvram_kmsg_dumper = {
+   .dump

[PATCH v3 2/3] pstore: Add pstore type id for firmware partition

2014-12-24 Thread Hari Bathini
This patch adds a pstore type id to be used for opal specific
nvram partitions.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 fs/pstore/inode.c  |3 +++
 include/linux/pstore.h |1 +
 2 files changed, 4 insertions(+)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 5041660..8e0c009 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -359,6 +359,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, 
u64 id, int count,
case PSTORE_TYPE_PPC_COMMON:
sprintf(name, powerpc-common-%s-%lld, psname, id);
break;
+   case PSTORE_TYPE_PPC_OPAL:
+   sprintf(name, powerpc-opal-%s-%lld, psname, id);
+   break;
case PSTORE_TYPE_UNKNOWN:
sprintf(name, unknown-%s-%lld, psname, id);
break;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index ece0c6b..af44980 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -39,6 +39,7 @@ enum pstore_type_id {
PSTORE_TYPE_PPC_RTAS= 4,
PSTORE_TYPE_PPC_OF  = 5,
PSTORE_TYPE_PPC_COMMON  = 6,
+   PSTORE_TYPE_PPC_OPAL= 7,
PSTORE_TYPE_UNKNOWN = 255
 };
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 3/3] pstore: add pstore support on powernv

2014-12-24 Thread Hari Bathini
This patch extends pstore, a generic interface to platform dependent
persistent storage, support for powernv  platform to capture certain
useful information, during dying moments. Such support is already in
place for  pseries platform.  This patch re-uses most of that code.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/nvram_64.c  |   25 +++--
 arch/powerpc/platforms/powernv/opal-nvram.c |   10 ++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 42e5c6a..293da88 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -127,6 +127,14 @@ static size_t oops_data_sz;
 static struct z_stream_s stream;
 
 #ifdef CONFIG_PSTORE
+#ifdef CONFIG_PPC_POWERNV
+static struct nvram_os_partition skiboot_partition = {
+   .name = ibm,skiboot,
+   .index = -1,
+   .os_partition = false
+};
+#endif
+
 #ifdef CONFIG_PPC_PSERIES
 static struct nvram_os_partition of_config_partition = {
.name = of-config,
@@ -477,6 +485,16 @@ static ssize_t nvram_pstore_read(u64 *id, enum 
pstore_type_id *type,
time-tv_nsec = 0;
break;
 #endif
+#ifdef CONFIG_PPC_POWERNV
+   case PSTORE_TYPE_PPC_OPAL:
+   sig = NVRAM_SIG_FW;
+   part = skiboot_partition;
+   *type = PSTORE_TYPE_PPC_OPAL;
+   *id = PSTORE_TYPE_PPC_OPAL;
+   time-tv_sec = 0;
+   time-tv_nsec = 0;
+   break;
+#endif
default:
return 0;
}
@@ -552,8 +570,11 @@ static int nvram_pstore_init(void)
 {
int rc = 0;
 
-   nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
-   nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+   if (machine_is(pseries)) {
+   nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
+   nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+   } else
+   nvram_type_ids[2] = PSTORE_TYPE_PPC_OPAL;
 
nvram_pstore_info.buf = oops_data;
nvram_pstore_info.bufsize = oops_data_sz;
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c 
b/arch/powerpc/platforms/powernv/opal-nvram.c
index f9896fd..9db4398 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -16,6 +16,7 @@
 #include linux/of.h
 
 #include asm/opal.h
+#include asm/nvram.h
 #include asm/machdep.h
 
 static unsigned int nvram_size;
@@ -62,6 +63,15 @@ static ssize_t opal_nvram_write(char *buf, size_t count, 
loff_t *index)
return count;
 }
 
+static int __init opal_nvram_init_log_partitions(void)
+{
+   /* Scan nvram for partitions */
+   nvram_scan_partitions();
+   nvram_init_oops_partition(0);
+   return 0;
+}
+machine_arch_initcall(powernv, opal_nvram_init_log_partitions);
+
 void __init opal_nvram_init(void)
 {
struct device_node *np;

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 2/3] pstore: Add pstore type id for PPC64 opal nvram partition

2015-01-30 Thread Hari Bathini
This patch adds a new PPC64 partition type to be used for opal
specific nvram partition. A new partition type is needed as none
of the existing type matches this partition type.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
Cc: Anton Vorontsov an...@enomsg.org
Cc: Colin Cross ccr...@android.com
Cc: Kees Cook keesc...@chromium.org
Cc: Tony Luck tony.l...@intel.com
---
 fs/pstore/inode.c  |3 +++
 include/linux/pstore.h |1 +
 2 files changed, 4 insertions(+)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 5041660..8e0c009 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -359,6 +359,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, 
u64 id, int count,
case PSTORE_TYPE_PPC_COMMON:
sprintf(name, powerpc-common-%s-%lld, psname, id);
break;
+   case PSTORE_TYPE_PPC_OPAL:
+   sprintf(name, powerpc-opal-%s-%lld, psname, id);
+   break;
case PSTORE_TYPE_UNKNOWN:
sprintf(name, unknown-%s-%lld, psname, id);
break;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index ece0c6b..af44980 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -39,6 +39,7 @@ enum pstore_type_id {
PSTORE_TYPE_PPC_RTAS= 4,
PSTORE_TYPE_PPC_OF  = 5,
PSTORE_TYPE_PPC_COMMON  = 6,
+   PSTORE_TYPE_PPC_OPAL= 7,
PSTORE_TYPE_UNKNOWN = 255
 };
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 3/3] pstore: add pstore support on powernv

2015-01-30 Thread Hari Bathini
This patch extends pstore, a generic interface to platform dependent
persistent storage, support for powernv  platform to capture certain
useful information, during dying moments. Such support is already in
place for  pseries platform. This patch re-uses most of that code.

It is a common practice to compile kernels with both CONFIG_PPC_PSERIES=y
and CONFIG_PPC_POWERNV=y. The code in nvram_init_oops_partition() routine
still works as intended, as the caller is platform specific code which
passes the appropriate value for rtas_partition_exists parameter.
In all other places, where CONFIG_PPC_PSERIES or CONFIG_PPC_POWERNV
flag is used in this patchset, it is to reduce the kernel size in cases
where this flag is not set and doesn't have any impact logic wise.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
Cc: Anton Vorontsov an...@enomsg.org
Cc: Colin Cross ccr...@android.com
Cc: Kees Cook keesc...@chromium.org
Cc: Tony Luck tony.l...@intel.com
---
 arch/powerpc/kernel/nvram_64.c  |   25 +++--
 arch/powerpc/platforms/powernv/opal-nvram.c |   10 ++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 42e5c6a..293da88 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -127,6 +127,14 @@ static size_t oops_data_sz;
 static struct z_stream_s stream;
 
 #ifdef CONFIG_PSTORE
+#ifdef CONFIG_PPC_POWERNV
+static struct nvram_os_partition skiboot_partition = {
+   .name = ibm,skiboot,
+   .index = -1,
+   .os_partition = false
+};
+#endif
+
 #ifdef CONFIG_PPC_PSERIES
 static struct nvram_os_partition of_config_partition = {
.name = of-config,
@@ -477,6 +485,16 @@ static ssize_t nvram_pstore_read(u64 *id, enum 
pstore_type_id *type,
time-tv_nsec = 0;
break;
 #endif
+#ifdef CONFIG_PPC_POWERNV
+   case PSTORE_TYPE_PPC_OPAL:
+   sig = NVRAM_SIG_FW;
+   part = skiboot_partition;
+   *type = PSTORE_TYPE_PPC_OPAL;
+   *id = PSTORE_TYPE_PPC_OPAL;
+   time-tv_sec = 0;
+   time-tv_nsec = 0;
+   break;
+#endif
default:
return 0;
}
@@ -552,8 +570,11 @@ static int nvram_pstore_init(void)
 {
int rc = 0;
 
-   nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
-   nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+   if (machine_is(pseries)) {
+   nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
+   nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+   } else
+   nvram_type_ids[2] = PSTORE_TYPE_PPC_OPAL;
 
nvram_pstore_info.buf = oops_data;
nvram_pstore_info.bufsize = oops_data_sz;
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c 
b/arch/powerpc/platforms/powernv/opal-nvram.c
index f9896fd..9db4398 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -16,6 +16,7 @@
 #include linux/of.h
 
 #include asm/opal.h
+#include asm/nvram.h
 #include asm/machdep.h
 
 static unsigned int nvram_size;
@@ -62,6 +63,15 @@ static ssize_t opal_nvram_write(char *buf, size_t count, 
loff_t *index)
return count;
 }
 
+static int __init opal_nvram_init_log_partitions(void)
+{
+   /* Scan nvram for partitions */
+   nvram_scan_partitions();
+   nvram_init_oops_partition(0);
+   return 0;
+}
+machine_arch_initcall(powernv, opal_nvram_init_log_partitions);
+
 void __init opal_nvram_init(void)
 {
struct device_node *np;

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 1/3] powerpc/nvram: move generic code for nvram and pstore

2015-01-30 Thread Hari Bathini
With minor checks, we can move most of the code for nvram
under pseries to a common place to be re-used by other
powerpc platforms like powernv. This patch moves such
common code to arch/powerpc/kernel/nvram_64.c file.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/nvram.h   |   50 ++
 arch/powerpc/include/asm/rtas.h|4 
 arch/powerpc/kernel/nvram_64.c |  656 
 arch/powerpc/platforms/pseries/nvram.c |  665 
 4 files changed, 714 insertions(+), 661 deletions(-)

diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index b0fe0fe..09a518b 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -9,12 +9,43 @@
 #ifndef _ASM_POWERPC_NVRAM_H
 #define _ASM_POWERPC_NVRAM_H
 
-
+#include linux/types.h
 #include linux/errno.h
 #include linux/list.h
 #include uapi/asm/nvram.h
 
+/*
+ * Set oops header version to distinguish between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version  4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
+struct err_log_info {
+   __be32 error_type;
+   __be32 seq_num;
+};
+
+struct nvram_os_partition {
+   const char *name;
+   int req_size;   /* desired size, in bytes */
+   int min_size;   /* minimum acceptable size (0 means req_size) */
+   long size;  /* size of data portion (excluding err_log_info) */
+   long index; /* offset of data portion of partition */
+   bool os_partition; /* partition initialized by OS, not FW */
+};
+
+struct oops_log_info {
+   __be16 version;
+   __be16 report_length;
+   __be64 timestamp;
+} __attribute__((packed));
+
+extern struct nvram_os_partition oops_log_partition;
+
 #ifdef CONFIG_PPC_PSERIES
+extern struct nvram_os_partition rtas_log_partition;
+
 extern int nvram_write_error_log(char * buff, int length,
 unsigned int err_type, unsigned int 
err_seq);
 extern int nvram_read_error_log(char * buff, int length,
@@ -50,6 +81,23 @@ extern void  pmac_xpram_write(int xpaddr, u8 data);
 /* Synchronize NVRAM */
 extern voidnvram_sync(void);
 
+/* Initialize NVRAM OS partition */
+extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
+
+/* Initialize NVRAM oops partition */
+extern void __init nvram_init_oops_partition(int rtas_partition_exists);
+
+/* Read a NVRAM partition */
+extern int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+   int length, unsigned int *err_type,
+   unsigned int *error_log_cnt);
+
+/* Write to NVRAM OS partition */
+extern int nvram_write_os_partition(struct nvram_os_partition *part,
+   char *buff, int length,
+   unsigned int err_type,
+   unsigned int error_log_cnt);
+
 /* Determine NVRAM size */
 extern ssize_t nvram_get_size(void);
 
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b390f55..123d7ff 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -343,8 +343,12 @@ extern int early_init_dt_scan_rtas(unsigned long node,
 extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
+extern unsigned long last_rtas_event;
+extern int clobbering_unread_rtas_event(void);
 extern int pseries_devicetree_update(s32 scope);
 extern void post_mobility_fixup(void);
+#else
+static inline int clobbering_unread_rtas_event(void) { return 0; }
 #endif
 
 #ifdef CONFIG_PPC_RTAS_DAEMON
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 34f7c9b..42e5c6a 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -26,6 +26,9 @@
 #include linux/init.h
 #include linux/slab.h
 #include linux/spinlock.h
+#include linux/kmsg_dump.h
+#include linux/pstore.h
+#include linux/zlib.h
 #include asm/uaccess.h
 #include asm/nvram.h
 #include asm/rtas.h
@@ -54,6 +57,659 @@ struct nvram_partition {
 
 static LIST_HEAD(nvram_partitions);
 
+#ifdef CONFIG_PPC_PSERIES
+struct nvram_os_partition rtas_log_partition = {
+   .name = ibm,rtas-log,
+   .req_size = 2079,
+   .min_size = 1055,
+   .index = -1,
+   .os_partition = true
+};
+#endif
+
+struct nvram_os_partition oops_log_partition = {
+   .name = lnx,oops-log,
+   .req_size = 4000,
+   .min_size = 2000,
+   .index = -1,
+   .os_partition = true
+};
+
+static const char *nvram_os_partitions[] = {
+#ifdef CONFIG_PPC_PSERIES
+   ibm,rtas-log,
+#endif
+   lnx,oops-log,
+   NULL
+};
+
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason);
+
+static struct kmsg_dumper nvram_kmsg_dumper

[PATCH v4 0/3] powerpc/pstore: Add pstore support for nvram partitions

2015-01-30 Thread Hari Bathini
This patch series adds pstore support on powernv platform to
read different nvram partitions and write compressed data to
oops-log nvram partition. As pseries platform already has
pstore support, this series moves most of the common code
for pseries and powernv platforms to a common file. Tested
the patches successfully on both pseries and powernv
platforms. Also, tested the patches successfully, on a kernel
compiled with both CONFIG_PPC_PSERIES=y  CONFIG_PPC_POWERNV=y.

Changes from v3:
1. Updated the changelog
2. Resolved compile issues with !CONFIG_PPC_PSERIES

---

Hari Bathini (3):
  powerpc/nvram: move generic code for nvram and pstore
  pstore: Add pstore type id for PPC64 opal nvram partition
  pstore: add pstore support on powernv


 arch/powerpc/include/asm/nvram.h|   50 ++
 arch/powerpc/include/asm/rtas.h |4 
 arch/powerpc/kernel/nvram_64.c  |  677 +++
 arch/powerpc/platforms/powernv/opal-nvram.c |   10 
 arch/powerpc/platforms/pseries/nvram.c  |  665 ---
 fs/pstore/inode.c   |3 
 include/linux/pstore.h  |1 
 7 files changed, 749 insertions(+), 661 deletions(-)

--
- Hari

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 1/3] powerpc/nvram: move generic code for nvram and pstore

2015-01-14 Thread Hari Bathini

On 01/14/2015 10:01 AM, Michael Ellerman wrote:

On Wed, 2014-12-24 at 17:28 +0530, Hari Bathini wrote:

With minor checks, we can move most of the code for nvram
under pseries to a common place to be re-used by other
powerpc platforms like powernv. This patch moves such
common code to arch/powerpc/kernel/nvram_64.c file.

As I said in my reply to the previous version:

 ... you need to keep in mind that it is very common for us to build kernels
 with both POWERNV=y and PSERIES=y.
 
 So you need to make sure you're only using CONFIG_PPC_PSERIES to protect things

 that are optional on pseries. Not things that we *shouldn't* be doing on
 powernv.


we could as well do away with the PPC_PSERIES flag in a couple of places in
arch/powerpc/kernel/nvram_64.c, but doing that will unnecessarily add
few extra variables for !PPC_PSERIES case.


Please explain in your commit message how you have dealt with that.



Sure. Will update the changelog


Also, you broke the build for every config that doesn't have
CONFIG_PPC_PSERIES, all 95 of them. This is pasemi_defconfig for example:


My bad!clobbering_unread_rtas_event should have been static inline
while defining under !PPC_PSERIES

Thanks
Hari


   LD  arch/powerpc/mm/built-in.o
 arch/powerpc/mm/init_64.o: In function `clobbering_unread_rtas_event':
 init_64.c:(.opd+0x48): multiple definition of 
`clobbering_unread_rtas_event'
 arch/powerpc/mm/mem.o:mem.c:(.opd+0x90): first defined here
 arch/powerpc/mm/init_64.o: In function `.clobbering_unread_rtas_event':
 init_64.c:(.text+0x80): multiple definition of 
`.clobbering_unread_rtas_event'
 arch/powerpc/mm/mem.o:mem.c:(.text+0x2c0): first defined here
   CC  arch/powerpc/kernel/udbg.o
 /home/kisskb/slave/src/scripts/Makefile.build:336: recipe for target 
'arch/powerpc/mm/built-in.o' failed
 make[2]: *** [arch/powerpc/mm/built-in.o] Error 1
 /home/kisskb/slave/src/Makefile:938: recipe for target 'arch/powerpc/mm' 
failed
 make[1]: *** [arch/powerpc/mm] Error 2
 make[1]: *** Waiting for unfinished jobs


cheers




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 1/3] powerpc/nvram: move generic code for nvram and pstore

2015-01-16 Thread Hari Bathini

On 01/15/2015 03:58 AM, Michael Ellerman wrote:

On Wed, 2015-01-14 at 23:35 +0530, Hari Bathini wrote:

On 01/14/2015 10:01 AM, Michael Ellerman wrote:


On Wed, 2014-12-24 at 17:28 +0530, Hari Bathini wrote:

With minor checks, we can move most of the code for nvram
under pseries to a common place to be re-used by other
powerpc platforms like powernv. This patch moves such
common code to arch/powerpc/kernel/nvram_64.c file.

As I said in my reply to the previous version:

 ... you need to keep in mind that it is very common for us to build kernels
 with both POWERNV=y and PSERIES=y.
 
 So you need to make sure you're only using CONFIG_PPC_PSERIES to protect things

 that are optional on pseries. Not things that we *shouldn't* be doing on
 powernv.

we could as well do away with the PPC_PSERIES flag in a couple of
places in
arch/powerpc/kernel/nvram_64.c, but doing that will unnecessarily add
few extra variables for !PPC_PSERIES case.

Yep. I'm happy for them to be there, I just want you to explain in the
changelog that you've thought about the PSERIES=y POWERNV=y case and why the
code makes sense for that configuration.


Please explain in your commit message how you have dealt with that.

Sure. Will update the changelog

Thanks.
  

Also, you broke the build for every config that doesn't have
CONFIG_PPC_PSERIES, all 95 of them. This is pasemi_defconfig for example:

My bad! clobbering_unread_rtas_event should have been static inline
while defining under !PPC_PSERIES

Correct.

Please make sure you test build at least some of the other configurations in
future. I realise it's too time consuming to build all of them, but ideally for
every config symbol you use in your patch you need to build a kernel config
where that symbol =y and =n (and =m if it's tristate).


Sure, Michael. I will keep this in mind :)

Thanks
Hari


cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v4 1/3] powerpc/nvram: move generic code for nvram and pstore

2015-02-05 Thread Hari Bathini

On 01/30/2015 10:12 PM, Arnd Bergmann wrote:

On Friday 30 January 2015 20:44:00 Hari Bathini wrote:

With minor checks, we can move most of the code for nvram
under pseries to a common place to be re-used by other
powerpc platforms like powernv. This patch moves such
common code to arch/powerpc/kernel/nvram_64.c file.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com

Can you make this y2038-safe in the process, possibly as a
follow-up patch?


Arnd, sorry for the delayed response.
I will add these changes to this patch-set and re-spin..

Thanks
Hari


+extern unsigned long last_rtas_event;

time64_t


+   }
+   oops_hdr-version = cpu_to_be16(OOPS_HDR_VERSION);
+   oops_hdr-report_length = cpu_to_be16(zipped_len);
+   oops_hdr-timestamp = cpu_to_be64(get_seconds());
+   return 0;

ktime_get_real_seconds()


+static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
+   int *count, struct timespec *time, char **buf,
+   bool *compressed, struct pstore_info *psi)

This has to remain timespec for now but can later be changed to timespec64
when the API gets changed.


+   oops_hdr-version = cpu_to_be16(OOPS_HDR_VERSION);
+   oops_hdr-report_length = cpu_to_be16(text_len);
+   oops_hdr-timestamp = cpu_to_be64(get_seconds());

ktime_get_real_seconds()

Arnd
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 2/4] pstore: Add pstore type id for PPC64 opal nvram partition

2015-02-05 Thread Hari Bathini
This patch adds a new PPC64 partition type to be used for opal
specific nvram partition. A new partition type is needed as none
of the existing type matches this partition type.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 fs/pstore/inode.c  |3 +++
 include/linux/pstore.h |1 +
 2 files changed, 4 insertions(+)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 5041660..8e0c009 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -359,6 +359,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, 
u64 id, int count,
case PSTORE_TYPE_PPC_COMMON:
sprintf(name, powerpc-common-%s-%lld, psname, id);
break;
+   case PSTORE_TYPE_PPC_OPAL:
+   sprintf(name, powerpc-opal-%s-%lld, psname, id);
+   break;
case PSTORE_TYPE_UNKNOWN:
sprintf(name, unknown-%s-%lld, psname, id);
break;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index ece0c6b..af44980 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -39,6 +39,7 @@ enum pstore_type_id {
PSTORE_TYPE_PPC_RTAS= 4,
PSTORE_TYPE_PPC_OF  = 5,
PSTORE_TYPE_PPC_COMMON  = 6,
+   PSTORE_TYPE_PPC_OPAL= 7,
PSTORE_TYPE_UNKNOWN = 255
 };
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 4/4] powerpc: make timestamp related code y2038-safe

2015-02-05 Thread Hari Bathini
While we are here, let us make timestamp related code
y2038-safe.

Suggested-by: Arnd Bergmann a...@arndb.de
Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/rtas.h|3 ++-
 arch/powerpc/kernel/nvram_64.c |6 +++---
 arch/powerpc/platforms/pseries/nvram.c |   10 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 123d7ff..efa9152 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -4,6 +4,7 @@
 
 #include linux/spinlock.h
 #include asm/page.h
+#include linux/time.h
 
 /*
  * Definitions for talking to the RTAS on CHRP machines.
@@ -343,7 +344,7 @@ extern int early_init_dt_scan_rtas(unsigned long node,
 extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
-extern unsigned long last_rtas_event;
+extern time64_t last_rtas_event;
 extern int clobbering_unread_rtas_event(void);
 extern int pseries_devicetree_update(s32 scope);
 extern void post_mobility_fixup(void);
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 293da88..1e703f8 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -376,7 +376,7 @@ static int zip_oops(size_t text_len)
}
oops_hdr-version = cpu_to_be16(OOPS_HDR_VERSION);
oops_hdr-report_length = cpu_to_be16(zipped_len);
-   oops_hdr-timestamp = cpu_to_be64(get_seconds());
+   oops_hdr-timestamp = cpu_to_be64(ktime_get_real_seconds());
return 0;
 }
 
@@ -423,7 +423,7 @@ static int nvram_pstore_write(enum pstore_type_id type,
 
oops_hdr-version = cpu_to_be16(OOPS_HDR_VERSION);
oops_hdr-report_length = cpu_to_be16(size);
-   oops_hdr-timestamp = cpu_to_be64(get_seconds());
+   oops_hdr-timestamp = cpu_to_be64(ktime_get_real_seconds());
 
if (compressed)
err_type = ERR_TYPE_KERNEL_PANIC_GZ;
@@ -721,7 +721,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
err_type = ERR_TYPE_KERNEL_PANIC;
oops_hdr-version = cpu_to_be16(OOPS_HDR_VERSION);
oops_hdr-report_length = cpu_to_be16(text_len);
-   oops_hdr-timestamp = cpu_to_be64(get_seconds());
+   oops_hdr-timestamp = cpu_to_be64(ktime_get_real_seconds());
}
 
(void) nvram_write_os_partition(oops_log_partition, oops_buf,
diff --git a/arch/powerpc/platforms/pseries/nvram.c 
b/arch/powerpc/platforms/pseries/nvram.c
index 97b8fc6..d77713b 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -37,10 +37,10 @@ static DEFINE_SPINLOCK(nvram_lock);
 
 /* See clobbering_unread_rtas_event() */
 #define NVRAM_RTAS_READ_TIMEOUT 5  /* seconds */
-static unsigned long last_unread_rtas_event;   /* timestamp */
+static time64_t last_unread_rtas_event;/* timestamp */
 
 #ifdef CONFIG_PSTORE
-unsigned long last_rtas_event;
+time64_t last_rtas_event;
 #endif
 
 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
@@ -145,9 +145,9 @@ int nvram_write_error_log(char * buff, int length,
int rc = nvram_write_os_partition(rtas_log_partition, buff, length,
err_type, error_log_cnt);
if (!rc) {
-   last_unread_rtas_event = get_seconds();
+   last_unread_rtas_event = ktime_get_real_seconds();
 #ifdef CONFIG_PSTORE
-   last_rtas_event = get_seconds();
+   last_rtas_event = ktime_get_real_seconds();
 #endif
}
 
@@ -201,7 +201,7 @@ int clobbering_unread_rtas_event(void)
 {
return (oops_log_partition.index == rtas_log_partition.index
 last_unread_rtas_event
-get_seconds() - last_unread_rtas_event =
+ktime_get_real_seconds() - last_unread_rtas_event =
NVRAM_RTAS_READ_TIMEOUT);
 }
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 0/4] powerpc/pstore: Add pstore support for nvram partitions

2015-02-05 Thread Hari Bathini
This patch series adds pstore support on powernv platform to
read different nvram partitions and write compressed data to
oops-log nvram partition. As pseries platform already has
pstore support, this series moves most of the common code
for pseries and powernv platforms to a common file. Tested
the patches successfully on both pseries and powernv
platforms. Also, tested the patches successfully, on a kernel
compiled with both CONFIG_PPC_PSERIES=y  CONFIG_PPC_POWERNV=y.

Changes from v4:
1. Added a patch for y2038-safe code changes

---

Hari Bathini (4):
  powerpc/nvram: move generic code for nvram and pstore
  pstore: Add pstore type id for PPC64 opal nvram partition
  pstore: add pstore support on powernv
  powerpc: make timestamp related code y2038-safe


 arch/powerpc/include/asm/nvram.h|   50 ++
 arch/powerpc/include/asm/rtas.h |5 
 arch/powerpc/kernel/nvram_64.c  |  677 +++
 arch/powerpc/platforms/powernv/opal-nvram.c |   10 
 arch/powerpc/platforms/pseries/nvram.c  |  673 ---
 fs/pstore/inode.c   |3 
 include/linux/pstore.h  |1 
 7 files changed, 754 insertions(+), 665 deletions(-)

--
-Hari

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 1/4] powerpc/nvram: move generic code for nvram and pstore

2015-02-05 Thread Hari Bathini
With minor checks, we can move most of the code for nvram
under pseries to a common place to be re-used by other
powerpc platforms like powernv. This patch moves such
common code to arch/powerpc/kernel/nvram_64.c file.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/nvram.h   |   50 ++
 arch/powerpc/include/asm/rtas.h|4 
 arch/powerpc/kernel/nvram_64.c |  656 
 arch/powerpc/platforms/pseries/nvram.c |  665 
 4 files changed, 714 insertions(+), 661 deletions(-)

diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index b0fe0fe..09a518b 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -9,12 +9,43 @@
 #ifndef _ASM_POWERPC_NVRAM_H
 #define _ASM_POWERPC_NVRAM_H
 
-
+#include linux/types.h
 #include linux/errno.h
 #include linux/list.h
 #include uapi/asm/nvram.h
 
+/*
+ * Set oops header version to distinguish between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version  4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
+struct err_log_info {
+   __be32 error_type;
+   __be32 seq_num;
+};
+
+struct nvram_os_partition {
+   const char *name;
+   int req_size;   /* desired size, in bytes */
+   int min_size;   /* minimum acceptable size (0 means req_size) */
+   long size;  /* size of data portion (excluding err_log_info) */
+   long index; /* offset of data portion of partition */
+   bool os_partition; /* partition initialized by OS, not FW */
+};
+
+struct oops_log_info {
+   __be16 version;
+   __be16 report_length;
+   __be64 timestamp;
+} __attribute__((packed));
+
+extern struct nvram_os_partition oops_log_partition;
+
 #ifdef CONFIG_PPC_PSERIES
+extern struct nvram_os_partition rtas_log_partition;
+
 extern int nvram_write_error_log(char * buff, int length,
 unsigned int err_type, unsigned int 
err_seq);
 extern int nvram_read_error_log(char * buff, int length,
@@ -50,6 +81,23 @@ extern void  pmac_xpram_write(int xpaddr, u8 data);
 /* Synchronize NVRAM */
 extern voidnvram_sync(void);
 
+/* Initialize NVRAM OS partition */
+extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
+
+/* Initialize NVRAM oops partition */
+extern void __init nvram_init_oops_partition(int rtas_partition_exists);
+
+/* Read a NVRAM partition */
+extern int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+   int length, unsigned int *err_type,
+   unsigned int *error_log_cnt);
+
+/* Write to NVRAM OS partition */
+extern int nvram_write_os_partition(struct nvram_os_partition *part,
+   char *buff, int length,
+   unsigned int err_type,
+   unsigned int error_log_cnt);
+
 /* Determine NVRAM size */
 extern ssize_t nvram_get_size(void);
 
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b390f55..123d7ff 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -343,8 +343,12 @@ extern int early_init_dt_scan_rtas(unsigned long node,
 extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
+extern unsigned long last_rtas_event;
+extern int clobbering_unread_rtas_event(void);
 extern int pseries_devicetree_update(s32 scope);
 extern void post_mobility_fixup(void);
+#else
+static inline int clobbering_unread_rtas_event(void) { return 0; }
 #endif
 
 #ifdef CONFIG_PPC_RTAS_DAEMON
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 34f7c9b..42e5c6a 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -26,6 +26,9 @@
 #include linux/init.h
 #include linux/slab.h
 #include linux/spinlock.h
+#include linux/kmsg_dump.h
+#include linux/pstore.h
+#include linux/zlib.h
 #include asm/uaccess.h
 #include asm/nvram.h
 #include asm/rtas.h
@@ -54,6 +57,659 @@ struct nvram_partition {
 
 static LIST_HEAD(nvram_partitions);
 
+#ifdef CONFIG_PPC_PSERIES
+struct nvram_os_partition rtas_log_partition = {
+   .name = ibm,rtas-log,
+   .req_size = 2079,
+   .min_size = 1055,
+   .index = -1,
+   .os_partition = true
+};
+#endif
+
+struct nvram_os_partition oops_log_partition = {
+   .name = lnx,oops-log,
+   .req_size = 4000,
+   .min_size = 2000,
+   .index = -1,
+   .os_partition = true
+};
+
+static const char *nvram_os_partitions[] = {
+#ifdef CONFIG_PPC_PSERIES
+   ibm,rtas-log,
+#endif
+   lnx,oops-log,
+   NULL
+};
+
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason);
+
+static struct kmsg_dumper nvram_kmsg_dumper

[PATCH v5 3/4] pstore: add pstore support on powernv

2015-02-05 Thread Hari Bathini
This patch extends pstore, a generic interface to platform dependent
persistent storage, support for powernv  platform to capture certain
useful information, during dying moments. Such support is already in
place for  pseries platform. This patch re-uses most of that code.

It is a common practice to compile kernels with both CONFIG_PPC_PSERIES=y
and CONFIG_PPC_POWERNV=y. The code in nvram_init_oops_partition() routine
still works as intended, as the caller is platform specific code which
passes the appropriate value for rtas_partition_exists parameter.
In all other places, where CONFIG_PPC_PSERIES or CONFIG_PPC_POWERNV
flag is used in this patchset, it is to reduce the kernel size in cases
where this flag is not set and doesn't have any impact logic wise.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/nvram_64.c  |   25 +++--
 arch/powerpc/platforms/powernv/opal-nvram.c |   10 ++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 42e5c6a..293da88 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -127,6 +127,14 @@ static size_t oops_data_sz;
 static struct z_stream_s stream;
 
 #ifdef CONFIG_PSTORE
+#ifdef CONFIG_PPC_POWERNV
+static struct nvram_os_partition skiboot_partition = {
+   .name = ibm,skiboot,
+   .index = -1,
+   .os_partition = false
+};
+#endif
+
 #ifdef CONFIG_PPC_PSERIES
 static struct nvram_os_partition of_config_partition = {
.name = of-config,
@@ -477,6 +485,16 @@ static ssize_t nvram_pstore_read(u64 *id, enum 
pstore_type_id *type,
time-tv_nsec = 0;
break;
 #endif
+#ifdef CONFIG_PPC_POWERNV
+   case PSTORE_TYPE_PPC_OPAL:
+   sig = NVRAM_SIG_FW;
+   part = skiboot_partition;
+   *type = PSTORE_TYPE_PPC_OPAL;
+   *id = PSTORE_TYPE_PPC_OPAL;
+   time-tv_sec = 0;
+   time-tv_nsec = 0;
+   break;
+#endif
default:
return 0;
}
@@ -552,8 +570,11 @@ static int nvram_pstore_init(void)
 {
int rc = 0;
 
-   nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
-   nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+   if (machine_is(pseries)) {
+   nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
+   nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+   } else
+   nvram_type_ids[2] = PSTORE_TYPE_PPC_OPAL;
 
nvram_pstore_info.buf = oops_data;
nvram_pstore_info.bufsize = oops_data_sz;
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c 
b/arch/powerpc/platforms/powernv/opal-nvram.c
index f9896fd..9db4398 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -16,6 +16,7 @@
 #include linux/of.h
 
 #include asm/opal.h
+#include asm/nvram.h
 #include asm/machdep.h
 
 static unsigned int nvram_size;
@@ -62,6 +63,15 @@ static ssize_t opal_nvram_write(char *buf, size_t count, 
loff_t *index)
return count;
 }
 
+static int __init opal_nvram_init_log_partitions(void)
+{
+   /* Scan nvram for partitions */
+   nvram_scan_partitions();
+   nvram_init_oops_partition(0);
+   return 0;
+}
+machine_arch_initcall(powernv, opal_nvram_init_log_partitions);
+
 void __init opal_nvram_init(void)
 {
struct device_node *np;

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 2/4] pstore: Add pstore type id for PPC64 opal nvram partition

2015-03-04 Thread Hari Bathini

On 02/06/2015 01:06 AM, Hari Bathini wrote:

This patch adds a new PPC64 partition type to be used for opal
specific nvram partition. A new partition type is needed as none
of the existing type matches this partition type.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com


This patch series is reviewed by Kees.
Reference link: https://lkml.org/lkml/2015/2/5/651

Reviewed-by: Kees Cook keesc...@chromium.org

Thanks
Hari


---
  fs/pstore/inode.c  |3 +++
  include/linux/pstore.h |1 +
  2 files changed, 4 insertions(+)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 5041660..8e0c009 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -359,6 +359,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, 
u64 id, int count,
case PSTORE_TYPE_PPC_COMMON:
sprintf(name, powerpc-common-%s-%lld, psname, id);
break;
+   case PSTORE_TYPE_PPC_OPAL:
+   sprintf(name, powerpc-opal-%s-%lld, psname, id);
+   break;
case PSTORE_TYPE_UNKNOWN:
sprintf(name, unknown-%s-%lld, psname, id);
break;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index ece0c6b..af44980 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -39,6 +39,7 @@ enum pstore_type_id {
PSTORE_TYPE_PPC_RTAS= 4,
PSTORE_TYPE_PPC_OF  = 5,
PSTORE_TYPE_PPC_COMMON  = 6,
+   PSTORE_TYPE_PPC_OPAL= 7,
PSTORE_TYPE_UNKNOWN = 255
  };
  


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] nvram: print no error message when nvram is not set as pstore backend

2015-05-11 Thread Hari Bathini
Pstore only supports one backend at a time. The preferred
pstore backend is set by passing the pstore.backend=name
argument to the kernel at boot time. Currently, while trying
to register with pstore, nvram throws an error message even
when pstore.backend != nvram, which is unnecessary. This
patch removes the error message in case pstore.backend != nvram.

Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/nvram_64.c |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 1e703f8..bfdbcab 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -582,9 +582,10 @@ static int nvram_pstore_init(void)
spin_lock_init(nvram_pstore_info.buf_lock);
 
rc = pstore_register(nvram_pstore_info);
-   if (rc != 0)
-   pr_err(nvram: pstore_register() failed, defaults to 
-   kmsg_dump; returned %d\n, rc);
+   if (rc  (rc != -EPERM))
+   /* Print error only when pstore.backend == nvram */
+   pr_err(nvram: pstore_register() failed, returned %d. 
+   Defaults to kmsg_dump\n, rc);
 
return rc;
 }

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: on crash, kexec'ed kernel needs all CPUs are online

2015-11-05 Thread Hari Bathini

On 11/05/2015 07:02 AM, David Gibson wrote:

On Wed, 4 Nov 2015 14:54:51 +0100
Laurent Vivier <lviv...@redhat.com> wrote:



On 04/11/2015 13:34, Hari Bathini wrote:

On 10/16/2015 12:30 AM, Laurent Vivier wrote:

On kexec, all secondary offline CPUs are onlined before
starting the new kernel, this is not done in the case of kdump.

If kdump is configured and a kernel crash occurs whereas
some secondaries CPUs are offline (SMT=off),
the new kernel is not able to start them and displays some
"Processor X is stuck.".

Starting with POWER8, subcore logic relies on all threads of
core being booted. So, on startup kernel tries to start all
threads, and asks OPAL (or RTAS) to start all CPUs (including
threads). If a CPU has been offlined by the previous kernel,
it has not been returned to OPAL, and thus OPAL cannot restart
it: this CPU has been lost...

Signed-off-by: Laurent Vivier<lviv...@redhat.com>


Hi Laurent,

Hi Hari,


Sorry for jumping too late into this.

better late than never :)


Are you seeing this issue even with the below patches:

pseries:
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c1caae3de46a072d0855729aed6e793e536a4a55

Unfortunately, this is unlikely to be relevant - this fixes a failure
while setting up the kexec.  The problem we see occurs once we've
booted the second kernel and it's attempting to bring up secondary CPUs.


opal/powernv:
https://github.com/open-power/skiboot/commit/9ee56b5

Very interesting. Is there a way to have a firmware with the fix ?

 From Laurent's analysis of the crash, I don't think this will be




relevant either, but I'm not sure.  It would be very interesting to
know which (if any) released firmwares include this patch so we can
test it.


Hi Laurent/David,

I am not so sure on this. While I get back on this, can you confirm you are
seeing the issue in both PowerVM (pseries) and baremetal (powernv). What is
the kernel version where the issue is seen for PowerVM and/or baremetal.
Also, for baremetal, can you mention the OPAL version on which the issue is
reproducible. If a bug is raised for this, I would be happy to be 
pointed to,

to get more information on this.

Thanks
Hari




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: on crash, kexec'ed kernel needs all CPUs are online

2015-11-04 Thread Hari Bathini

On 10/16/2015 12:30 AM, Laurent Vivier wrote:

On kexec, all secondary offline CPUs are onlined before
starting the new kernel, this is not done in the case of kdump.

If kdump is configured and a kernel crash occurs whereas
some secondaries CPUs are offline (SMT=off),
the new kernel is not able to start them and displays some
"Processor X is stuck.".

Starting with POWER8, subcore logic relies on all threads of
core being booted. So, on startup kernel tries to start all
threads, and asks OPAL (or RTAS) to start all CPUs (including
threads). If a CPU has been offlined by the previous kernel,
it has not been returned to OPAL, and thus OPAL cannot restart
it: this CPU has been lost...

Signed-off-by: Laurent Vivier



Hi Laurent,

Sorry for jumping too late into this.
Are you seeing this issue even with the below patches:

pseries:
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c1caae3de46a072d0855729aed6e793e536a4a55

opal/powernv:
https://github.com/open-power/skiboot/commit/9ee56b5

Thanks
Hari


---
  arch/powerpc/kernel/crash.c | 20 
  1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 51dbace..3ca9452 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -19,6 +19,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include 

  #include 
@@ -299,11 +300,30 @@ int crash_shutdown_unregister(crash_shutdown_t handler)
  }
  EXPORT_SYMBOL(crash_shutdown_unregister);
  
+/*

+ * The next kernel will try to start all secondary CPUs and if
+ * there are not online it will fail to start them.
+ *
+ */
+static void wake_offline_cpus(void)
+{
+   int cpu = 0;
+
+   for_each_present_cpu(cpu) {
+   if (!cpu_online(cpu)) {
+   pr_info("kexec: Waking offline cpu %d.\n", cpu);
+   cpu_up(cpu);
+   }
+   }
+}
+
  void default_machine_crash_shutdown(struct pt_regs *regs)
  {
unsigned int i;
int (*old_handler)(struct pt_regs *regs);
  
+	wake_offline_cpus();

+
/*
 * This function is only called after the system
 * has panicked or is otherwise in a critical state.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 2/2] powerpc/fadump: parse fadump reserve memory size based on memory range

2016-06-22 Thread Hari Bathini
Currently, memory for fadump can be specified with fadump_reserve_mem=size,
where only a fixed size can be specified. Add the below syntax as well, to
support conditional reservation based on system memory size:

fadump_reserve_mem=:[,:,...]

This syntax helps using the same commandline parameter for different system
memory sizes.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
Reviewed-by: Mahesh J Salgaonkar <mah...@linux.vnet.ibm.com>
---
Changes in v2:
1. Changed subject from "[PATCH v2 2/2] powerpc/fadump: add support to parse 
size based on memory range".
2. Rebased to latest upstream.

 arch/powerpc/kernel/fadump.c |   64 --
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 3cb3b02a..e435828 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -193,6 +193,56 @@ static unsigned long init_fadump_mem_struct(struct 
fadump_mem_struct *fdm,
return addr;
 }
 
+/*
+ * This function parses command line for fadump_reserve_mem=
+ *
+ * Supports the below two syntaxes:
+ *1. fadump_reserve_mem=size
+ *2. fadump_reserve_mem=ramsize-range:size[,...]
+ *
+ * Sets fw_dump.reserve_bootvar with the memory size
+ * provided, 0 otherwise
+ *
+ * The function returns -EINVAL on failure, 0 otherwise.
+ */
+static int __init parse_fadump_reserve_mem(void)
+{
+   char *name = "fadump_reserve_mem=";
+   char *fadump_cmdline = NULL, *cur;
+
+   fw_dump.reserve_bootvar = 0;
+
+   /* find fadump_reserve_mem and use the last one if there are many */
+   cur = strstr(boot_command_line, name);
+   while (cur) {
+   fadump_cmdline = cur;
+   cur = strstr(cur+1, name);
+   }
+
+   /* when no fadump_reserve_mem= cmdline option is provided */
+   if (!fadump_cmdline)
+   return 0;
+
+   fadump_cmdline += strlen(name);
+
+   /* for fadump_reserve_mem=size cmdline syntax */
+   if (!is_param_range_based(fadump_cmdline)) {
+   fw_dump.reserve_bootvar = memparse(fadump_cmdline, NULL);
+   return 0;
+   }
+
+   /* for fadump_reserve_mem=ramsize-range:size[,...] cmdline syntax */
+   cur = fadump_cmdline;
+   fw_dump.reserve_bootvar = parse_mem_range_size("fadump_reserve_mem",
+   , memblock_phys_mem_size());
+   if (cur == fadump_cmdline) {
+   printk(KERN_INFO "fadump_reserve_mem: Invaild syntax!\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 /**
  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
  *
@@ -212,12 +262,17 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
 {
unsigned long size;
 
+   /* sets fw_dump.reserve_bootvar */
+   parse_fadump_reserve_mem();
+
/*
 * Check if the size is specified through fadump_reserve_mem= cmdline
 * option. If yes, then use that.
 */
if (fw_dump.reserve_bootvar)
return fw_dump.reserve_bootvar;
+   else
+   printk(KERN_INFO "fadump: calculating default boot size\n");
 
/* divide by 20 to get 5% of value */
size = memblock_end_of_DRAM() / 20;
@@ -348,15 +403,6 @@ static int __init early_fadump_param(char *p)
 }
 early_param("fadump", early_fadump_param);
 
-/* Look for fadump_reserve_mem= cmdline option */
-static int __init early_fadump_reserve_mem(char *p)
-{
-   if (p)
-   fw_dump.reserve_bootvar = memparse(p, );
-   return 0;
-}
-early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-
 static void register_fw_dump(struct fadump_mem_struct *fdm)
 {
int rc;

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 0/2] powerpc/fadump: support memory range syntax for fadump memory reservation

2016-06-22 Thread Hari Bathini
This patchset adds support to input system memory range based memory size
for fadump reservation. The crashkernel parameter already supports such
syntax. The first patch refactors the parsing code of crashkernel parameter
for reuse. The second patch uses the newly refactored parsing code to reserve
memory for fadump based on system memory size.

---

Hari Bathini (2):
  refactor code parsing size based on memory range
  powerpc/fadump: parse fadump reserve memory size based on memory range


 arch/powerpc/kernel/fadump.c |   64 
 include/linux/kernel.h   |5 ++
 kernel/kexec_core.c  |   63 ++--
 kernel/params.c  |   96 ++
 4 files changed, 161 insertions(+), 67 deletions(-)

--
Hari Bathini

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/2] refactor code parsing size based on memory range

2016-06-22 Thread Hari Bathini
Currently, crashkernel parameter supports the below syntax to parse size
based on memory range:

crashkernel=:[,:,...]

While such parsing is implemented for crashkernel parameter, it applies to
other parameters with similar syntax. So, move this code to a more generic
place for code reuse.

Cc: Eric Biederman <ebied...@xmission.com>
Cc: Vivek Goyal <vgo...@redhat.com>
Cc: Rusty Russell <ru...@rustcorp.com.au>
Cc: ke...@lists.infradead.org
Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
changes in v2:
1. Rebased to latest upstream.
2. Marked few more people on cc.

 include/linux/kernel.h |5 +++
 kernel/kexec_core.c|   63 +++-
 kernel/params.c|   96 
 3 files changed, 106 insertions(+), 58 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 94aa10f..72f55e5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -436,6 +436,11 @@ extern char *get_options(const char *str, int nints, int 
*ints);
 extern unsigned long long memparse(const char *ptr, char **retptr);
 extern bool parse_option_str(const char *str, const char *option);
 
+extern bool __init is_param_range_based(const char *cmdline);
+extern unsigned long long __init parse_mem_range_size(const char *param,
+ char **str,
+ unsigned long long 
system_ram);
+
 extern int core_kernel_text(unsigned long addr);
 extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 56b3ed0..d43f5cc 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1083,59 +1083,9 @@ static int __init parse_crashkernel_mem(char *cmdline,
char *cur = cmdline, *tmp;
 
/* for each entry of the comma-separated list */
-   do {
-   unsigned long long start, end = ULLONG_MAX, size;
-
-   /* get the start of the range */
-   start = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (*cur != '-') {
-   pr_warn("crashkernel: '-' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   /* if no ':' is here, than we read the end */
-   if (*cur != ':') {
-   end = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (end <= start) {
-   pr_warn("crashkernel: end <= start\n");
-   return -EINVAL;
-   }
-   }
-
-   if (*cur != ':') {
-   pr_warn("crashkernel: ':' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   size = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (size >= system_ram) {
-   pr_warn("crashkernel: invalid size\n");
-   return -EINVAL;
-   }
-
-   /* match ? */
-   if (system_ram >= start && system_ram < end) {
-   *crash_size = size;
-   break;
-   }
-   } while (*cur++ == ',');
+   *crash_size = parse_mem_range_size("crashkernel", , system_ram);
+   if (cur == cmdline)
+   return -EINVAL;
 
if (*crash_size > 0) {
while (*cur && *cur != ' ' && *cur != '@')
@@ -1272,7 +1222,6 @@ static int __init __parse_crashkernel(char *cmdline,
 const char *name,
 const char *suffix)
 {
-   char*first_colon, *first_space;
char*ck_cmdline;
 
BUG_ON(!crash_size || !crash_base);
@@ -1290,12 +1239,10 @@ static int __init __parse_crashkernel(char *cmdline,
return parse_crashkernel_suffix(ck_cmdline, crash_size,
suffix);
/*
-* if the commandline contains a ':', then that's the extended
+* if the parameter is range based, then that's the extended
 * syntax -- if not, it must be the classic syntax
 */
-   first_col

Re: [v2,1/2] refactor code parsing size based on memory range

2016-06-24 Thread Hari Bathini



On 06/24/2016 10:56 AM, Michael Ellerman wrote:

On Wed, 2016-22-06 at 19:25:26 UTC, Hari Bathini wrote:

Currently, crashkernel parameter supports the below syntax to parse size
based on memory range:

crashkernel=:[,:,...]

While such parsing is implemented for crashkernel parameter, it applies to
other parameters with similar syntax. So, move this code to a more generic
place for code reuse.

Cc: Eric Biederman <ebied...@xmission.com>
Cc: Vivek Goyal <vgo...@redhat.com>
Cc: Rusty Russell <ru...@rustcorp.com.au>
Cc: ke...@lists.infradead.org
Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>

Hari, it's not immediately clear that this makes no change to the logic in the
kexec code. Can you reply with a longer change log explaining why the old & new
logic is the same for kexec.



Hi Michael,

Please consider this changelog for this patch:

--
crashkernel parameter supports different syntaxes to specify the amount
of memory to be reserved for kdump kernel. Below is one of the supported
syntaxes that needs parsing to find the memory size to reserve, based on
memory range:

crashkernel=:[,:,...]

While such parsing is implemented for crashkernel parameter, it applies to
other parameters, like fadump_reserve_mem, which could use similar syntax.
So, to reuse code, moving the code that checks if the parameter syntax is as
above and also the code that parses memory size to reserve, for this syntax.
While the code is moved to kernel/params.c file, there is no change in logic
for crashkernel parameter parsing as the moved code is invoked with function
calls at appropriate places.
--

Thanks
Hari





diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 94aa10f..72f55e5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -436,6 +436,11 @@ extern char *get_options(const char *str, int nints, int 
*ints);
  extern unsigned long long memparse(const char *ptr, char **retptr);
  extern bool parse_option_str(const char *str, const char *option);
  
+extern bool __init is_param_range_based(const char *cmdline);

+extern unsigned long long __init parse_mem_range_size(const char *param,
+ char **str,
+ unsigned long long 
system_ram);
+
  extern int core_kernel_text(unsigned long addr);
  extern int core_kernel_data(unsigned long addr);
  extern int __kernel_text_address(unsigned long addr);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 56b3ed0..d43f5cc 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1083,59 +1083,9 @@ static int __init parse_crashkernel_mem(char *cmdline,
char *cur = cmdline, *tmp;
  
  	/* for each entry of the comma-separated list */

-   do {
-   unsigned long long start, end = ULLONG_MAX, size;
-
-   /* get the start of the range */
-   start = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (*cur != '-') {
-   pr_warn("crashkernel: '-' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   /* if no ':' is here, than we read the end */
-   if (*cur != ':') {
-   end = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (end <= start) {
-   pr_warn("crashkernel: end <= start\n");
-   return -EINVAL;
-   }
-   }
-
-   if (*cur != ':') {
-   pr_warn("crashkernel: ':' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   size = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (size >= system_ram) {
-   pr_warn("crashkernel: invalid size\n");
-   return -EINVAL;
-   }
-
-   /* match ? */
-   if (system_ram >= start && system_ram < end) {
-   *crash_size = size;
-   break;
-   }
-   } while (*cur++ == ',');
+   *crash_size = parse_mem_range_size("crashkernel", , system_ram);
+   if (cur == cmdline)
+   return -EINVAL;
  
  	if (*crash_size > 0) {

Re: [v2] ppc64/book3s: fix branching to out of line handlers in relocation kernel

2016-03-30 Thread Hari Bathini



On 03/30/2016 04:47 PM, Michael Ellerman wrote:

On Wed, 2016-03-30 at 13:14 +0530, Hari Bathini wrote:

Alternatively, how about moving the OOLs handlers that can't be branched with
LOAD_HANDLER under __end_interrupts. This way we won't be copying more than a
few absolutely needed handlers.

STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
.
.
STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)


We can leave __end_handlers marker to indicate code that should be part
of the first 64K of kernel image.

That might work. But I suspect you will run into issues with ".org backwards",
ie. running out of space in head_64.S

But try it and let me know if it works.


It worked. Doing some sanity testing.
Will post v3 soon with this approach.


I think we also need to write a script or little C program which looks at the
vmlinux and checks that nothing below __end_whatever does a direct branch. So
that we don't break it again in future.


Yep. That would make life easy..
Let me see if I can do something about it.

Thanks
Hari

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3] ppc64/book3s: fix branching to out of line handlers in relocation kernel

2016-03-30 Thread Hari Bathini
Some of the interrupt vectors on 64-bit POWER server processors  are
only 32 bytes long (8 instructions), which is not enough for the full
first-level interrupt handler. For these we need to branch to an out-
of-line (OOL) handler. But when we are running a relocatable kernel,
interrupt vectors till __end_interrupts marker are copied down to real
address 0x100. So, branching to labels (read OOL handlers) outside this
section should be handled differently (see LOAD_HANDLER()), considering
relocatable kernel, which would need atleast 4 instructions.

However, branching from interrupt vector means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors as mentioned
in commit 1707dd16. So, EXCEPTION_PROLOG_0 (6 instructions) that contains
the part up to the point where the CFAR is saved in the PACA should be
part of the short interrupt vectors before we branch out to OOL handlers.

But as mentioned already, there are interrupt vectors on 64-bit POWER server
processors that are only 32 bytes long (like vectors 0x4f00, 0x4f20, etc.),
which cannot accomodate the above two cases at the same time owing to space
constraint. Currently, in these interrupt vectors, we simply branch out to
OOL handlers, without using LOAD_HANDLER(), which leaves us vulnerable when
running a relocatable kernel (eg. kdump case). While this has been the case
for sometime now and kdump is used widely, we were fortunate not to see any
problems so far, for three reasons:

1. In almost all cases, production kernel (relocatable) is used for
   kdump as well, which would mean that crashed kernel's OOL handler
   would be at the same place where we endup branching to, from short
   interrupt vector of kdump kernel.
2. Also, OOL handler was unlikely the reason for crash in almost all
   the kdump scenarios, which meant we had a sane OOL handler from
   crashed kernel that we branched to.
3. On most 64-bit POWER server processors, page size is large enough
   that marking interrupt vector code as executable (see commit
   429d2e83) leads to marking OOL handler code from crashed kernel,
   that sits right below interrupt vector code from kdump kernel, as
   executable as well.

Let us fix this undependable code path by moving these OOL handlers below
__end_interrupts marker to make sure we also copy these handlers to real
address 0x100 when running a relocatable kernel. Because the interrupt
vectors branching to these OOL handlers are not long enough to use
LOAD_HANDLER() for branching as discussed above.

This fix has been tested successfully in kdump scenario, on a lpar with 4K page
size by using different default/production kernel and kdump kernel.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>
---

changes from v2:
2. Move the OOL handlers before __end_interrupts marker instead of moving the 
__end_interrupts marker
3. Leave __end_handlers marker as is.

 arch/powerpc/kernel/exceptions-64s.S |   29 +++--
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 7716ceb..9ac3a38 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -953,6 +953,25 @@ hv_facility_unavailable_relon_trampoline:
 #endif
STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
 
+   /*
+* Out-Of-Line handlers for relocation-on interrupt vectors
+*
+* We need these OOL handlers to be below __end_interrupts
+* marker to enusre we also copy these OOL handlers along
+* with the interrupt vectors to real address 0x100 when
+* running a relocatable kernel. Because the interrupt
+* vectors branching to these OOL handlers are not long
+* enough to use LOAD_HANDLER() for branching.
+*/
+   STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
+   MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
+
+   STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
+   STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
+   STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
+   STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
+   STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
+
/* Other future vectors */
.align  7
.globl  __end_interrupts
@@ -1234,16 +1253,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
.globl  __end_handlers
 __end_handlers:
 
-   /* Equivalents to the above handlers for relocation-on interrupt 
vectors */
-   STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
-   MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
-
-   STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
-   STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec

[PATCH v4 3/3] ppc64/book3s: remove __end_handlers marker

2016-04-07 Thread Hari Bathini
__end_handlers marker was intended to mark down upto code that gets
called from exception prologs. But that hasn't kept pace with code
changes. Case in point, slb_miss_realmode being called from exception
prolog code but isn't below __end_handlers marker. So, __end_handlers
marker is as good as a comment but could be misleading at times if
it isn't in sync with the code, as is the case now. So, let us avoid
this confusion by having a better comment and removing __end_handlers
marker altogether.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/exceptions-64s.S |   13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index c193ebd..80f9fc4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -764,11 +764,10 @@ kvmppc_skip_Hinterrupt:
 #endif
 
 /*
- * Code from here down to __end_handlers is invoked from the
- * exception prologs above.  Because the prologs assemble the
- * addresses of these handlers using the LOAD_HANDLER macro,
- * which uses an ori instruction, these handlers must be in
- * the first 64k of the kernel image.
+ * Ensure that any handlers that get invoked from the exception prologs
+ * above are below the first 64KB (0x1) of the kernel image because
+ * the prologs assemble the addresses of these handlers using the
+ * LOAD_HANDLER macro, which uses an ori instruction.
  */
 
 /*** Common interrupt handlers ***/
@@ -1243,10 +1242,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
bl  vsx_unavailable_exception
b   ret_from_except
 
-   .align  7
-   .globl  __end_handlers
-__end_handlers:
-
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
 /*
  * Data area reserved for FWNMI option.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 2/3] ppc64/book3s: make some room for common interrupt vector code

2016-04-07 Thread Hari Bathini
With the previous patch, we choke out whatever little space is left
below 0x7000 (FWNMI hard block) while there is a hole of ~1400 bytes
below __end_interrupts marker when CONFIG_CBE_RAS is disabled.
Considering CONFIG_CBE_RAS is not enabled by default for BOOK3S, this
is not a desirable scenario especially when we have to worry about
each additional instruction that goes below 0x7000.

Memory region from 0x1800 to 0x4000 is dedicated for common interrupt
vector code. Also, we never hit an interrupt below 0x300 when IR=DR=1
implying memory region between 0x4000 to 0x4300 can also be used for
common interrupt vector code. So, we can effectively use memory region
between 0x1800 to 0x4300 for common interrupt vector code.

This patch tries to free up some space below 0x7000 by rearranging the
common interrupt vector code. The approach here is to avoid large holes
below 0x4300 for any kernel configuration. For this, let us move common
interrupt vector code that only gets enabled with CONFIG_CBE_RAS above
0x8000, as it doesn't need to be too close to the call sites and can be
branched to with LOAD_HANDLER() as long as it is within the first 64KB
(0x1) of the kernel image. Instead, lets move common interrupt vector
code marked h_instr_storage_common, facility_unavailable_common &
hv_facility_unavailable_common below 0x4300. This leaves ~250 bytes
free below 0x4300 and ~1150 bytes free below 0x7000 - enough space to
stop worrying about every additional instruction that goes below 0x7000.

This patch assumes at least commit 376af594, part of the patch series
that starts with commit 468a3302, is part of the code to avoid messy
compilation issues like:

relocation truncated to fit: R_PPC64_REL14 against `.text'+1c90
Makefile:864: recipe for target 'vmlinux' failed

I tested this patch successfully on ppc64, ppc64le lpars and baremetal
environments. Couldn't test it on IBM cell blade though but expecting no
problems with this patch in IBM cell blade environment as well. If
someone can test this patch in cell platform, it would be great.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/exceptions-64s.S |   20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index f76b2f3..c193ebd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -786,6 +786,7 @@ kvmppc_skip_Hinterrupt:
STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception)
STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception)
STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception)
+   STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
STD_EXCEPTION_COMMON(0xe40, emulation_assist, 
emulation_assist_interrupt)
STD_EXCEPTION_COMMON_ASYNC(0xe60, hmi_exception, handle_hmi_exception)
 #ifdef CONFIG_PPC_DOORBELL
@@ -794,6 +795,9 @@ kvmppc_skip_Hinterrupt:
STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception)
 #endif
STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, 
performance_monitor_exception)
+   STD_EXCEPTION_COMMON(0xf60, facility_unavailable, 
facility_unavailable_exception)
+   STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, 
facility_unavailable_exception)
+
STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, 
instruction_breakpoint_exception)
STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception)
 #ifdef CONFIG_ALTIVEC
@@ -801,11 +805,6 @@ kvmppc_skip_Hinterrupt:
 #else
STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception)
 #endif
-#ifdef CONFIG_CBE_RAS
-   STD_EXCEPTION_COMMON(0x1200, cbe_system_error, 
cbe_system_error_exception)
-   STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception)
-   STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)
-#endif /* CONFIG_CBE_RAS */
 
/*
 * Relocation-on interrupts: A subset of the interrupts can be delivered
@@ -1029,8 +1028,6 @@ instruction_access_common:
li  r5,0x400
b   do_hash_page/* Try to handle as hpte fault */
 
-   STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
-
 /*
  * Here is the common SLB miss user that is used when going to virtual
  * mode for SLB misses, that is currently not used
@@ -1246,9 +1243,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
bl  vsx_unavailable_exception
b   ret_from_except
 
-   STD_EXCEPTION_COMMON(0xf60, facility_unavailable, 
facility_unavailable_exception)
-   STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, 
facility_unavailable_exception)
-
.align  7
.globl  __end_handlers
 __end_handlers:
@@ -1268,6 +1262,12 @@ fwnmi_data_area:
. = 0x8000
 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
 
+#ifdef CONFIG_CBE_RAS
+   STD_EXCEPT

[PATCH v4 1/3] ppc64/book3s: fix branching to out of line handlers in relocation kernel

2016-04-07 Thread Hari Bathini
Some of the interrupt vectors on 64-bit POWER server processors  are
only 32 bytes long (8 instructions), which is not enough for the full
first-level interrupt handler. For these we need to branch to an out-
of-line (OOL) handler. But when we are running a relocatable kernel,
interrupt vectors till __end_interrupts marker are copied down to real
address 0x100. So, branching to labels (read OOL handlers) outside this
section should be handled differently (see LOAD_HANDLER()), considering
relocatable kernel, which would need atleast 4 instructions.

However, branching from interrupt vector means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors as mentioned
in commit 1707dd16. So, EXCEPTION_PROLOG_0 (6 instructions) that contains
the part up to the point where the CFAR is saved in the PACA should be
part of the short interrupt vectors before we branch out to OOL handlers.

But as mentioned already, there are interrupt vectors on 64-bit POWER server
processors that are only 32 bytes long (like vectors 0x4f00, 0x4f20, etc.),
which cannot accomodate the above two cases at the same time owing to space
constraint. Currently, in these interrupt vectors, we simply branch out to
OOL handlers, without using LOAD_HANDLER(), which leaves us vulnerable when
running a relocatable kernel (eg. kdump case). While this has been the case
for sometime now and kdump is used widely, we were fortunate not to see any
problems so far, for three reasons:

1. In almost all cases, production kernel (relocatable) is used for
   kdump as well, which would mean that crashed kernel's OOL handler
   would be at the same place where we endup branching to, from short
   interrupt vector of kdump kernel.
2. Also, OOL handler was unlikely the reason for crash in almost all
   the kdump scenarios, which meant we had a sane OOL handler from
   crashed kernel that we branched to.
3. On most 64-bit POWER server processors, page size is large enough
   that marking interrupt vector code as executable (see commit
   429d2e83) leads to marking OOL handler code from crashed kernel,
   that sits right below interrupt vector code from kdump kernel, as
   executable as well.

Let us fix this undependable code path by moving these OOL handlers below
__end_interrupts marker to make sure we also copy these handlers to real
address 0x100 when running a relocatable kernel. Because the interrupt
vectors branching to these OOL handlers are not long enough to use
LOAD_HANDLER() for branching as discussed above.

This fix has been tested successfully in kdump scenario, on a lpar with 4K page
size by using different default/production kernel and kdump kernel.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>
---

Michael, I did test this patchset in different scenarios. But if you feel
the change is too radical, we could go with version2. But I thought this was
worth a shot.

changes from v3:
1. No changes in this patch except for a spellcheck
2. A new patch that tries to free up space below 0x7000 (2/3)
3. A new patch to remove __end_handlers marker (3/3)


 arch/powerpc/kernel/exceptions-64s.S |   29 +++--
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 7716ceb..f76b2f3 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -953,6 +953,25 @@ hv_facility_unavailable_relon_trampoline:
 #endif
STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
 
+   /*
+* Out-Of-Line handlers for relocation-on interrupt vectors
+*
+* We need these OOL handlers to be below __end_interrupts
+* marker to ensure we also copy these OOL handlers along
+* with the interrupt vectors to real address 0x100 when
+* running a relocatable kernel. Because the interrupt
+* vectors branching to these OOL handlers are not long
+* enough to use LOAD_HANDLER() for branching.
+*/
+   STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
+   MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
+
+   STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
+   STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
+   STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
+   STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
+   STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
+
/* Other future vectors */
.align  7
.globl  __end_interrupts
@@ -1234,16 +1253,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
.globl  __end_handlers
 __end_handlers:
 
-   /* Equivalents to the above handlers for relocation-on interrupt 
vectors */
-   STD_RELON_EXCEPTION_HV_OOL(0xe40, emul

Re: [PATCH v3] ppc64/book3s: fix branching to out of line handlers in relocation kernel

2016-04-01 Thread Hari Bathini



On 04/01/2016 04:07 PM, Michael Ellerman wrote:

On Fri, 2016-04-01 at 12:23 +0530, Hari Bathini wrote:

On 04/01/2016 11:44 AM, Michael Ellerman wrote:

On Wed, 2016-03-30 at 23:49 +0530, Hari Bathini wrote:

Some of the interrupt vectors on 64-bit POWER server processors  are
only 32 bytes long (8 instructions), which is not enough for the full

...

Let us fix this undependable code path by moving these OOL handlers below
__end_interrupts marker to make sure we also copy these handlers to real
address 0x100 when running a relocatable kernel. Because the interrupt
vectors branching to these OOL handlers are not long enough to use
LOAD_HANDLER() for branching as discussed above.


...

changes from v2:
2. Move the OOL handlers before __end_interrupts marker instead of moving the 
__end_interrupts marker
3. Leave __end_handlers marker as is.

Hi Hari,

Thanks for trying this. In the end I've decided it's not a good option.

If you build an allmodconfig, and turn on CONFIG_RELOCATABLE, and then look at
the disassembly, you see this:

c0006ffc:   48 00 29 04 b   c0009900 
<.ret_from_except>

c0007000 <__end_handlers>:


At 0x7000 we have the FWNMI area, which is fixed and can't move. As you see
above we end up with only 4 bytes of space between the end of the handlers and
the FWNMI area.

So any tiny change that adds two more instructions prior to 0x7000 will then
fail to build.

Hi Michael,

I agree. But the OOL handlers that are moved up in v3 were below
0x7000 earlier as well and moving them below __end_interrupts marker
shouldn't make any difference in terms of space consumption at least in
comparison between v2 & v3. So, I guess picking either v2 or v3
doesn't change this for better.

It does make a difference, due to alignment. Prior to your patch we have ~24
bytes free.


Hi Michael,

Hmmm.. I thought ~24 bytes was not such a difference but with the scenario
you mentioned it does sound critical. Actually, this patch came into being
for want of another 8~12 bytes. So, I should have known better about
space constraint.




Also, there is code between __end_interrupts and __end_handlers
that is not location dependent as long as it is within 64K (0x1)
that can be moved above 0x8000, if need be.

That's true, but that sort of change is unlikely to backport well. And we need
to backport this fix to everything.


That does sound like a maintainer's nightmare.


But if you can get that to work I'll consider it. I tried quickly but couldn't
get it working, due to problems with the feature else sections being too far
away from.


Same case. May need sometime to get that right.
Also, exploring holes between __start_interrupts & __end_interrupts.
Will try and get back on this soon.
If none of this works, we have v2 anyway.

Thanks
Hari

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: ppc64/book3s: copy interrupts till __end_handlers marker instead of __end_interrupts

2016-03-29 Thread Hari Bathini



On 03/29/2016 03:47 PM, Michael Ellerman wrote:

Hi Hari,

You win the "Best Change Log of the Year" award.

Some comments below ...

On Mon, 2016-28-03 at 11:23:22 UTC, Hari Bathini wrote:

Some of the interrupt vectors on 64-bit POWER server processors  are
only 32 bytes long (8 instructions), which is not enough for the full
first-level interrupt handler. For these we need to branch to an out-
of-line (OOL) handler. But when we are running a relocatable kernel,
interrupt vectors till __end_interrupts marker are copied down to real
address 0x100. So, branching to labels (read OOL handlers) outside this
section should be handled differently (see LOAD_HANDLER()), considering
relocatable kernel, which would need atleast 4 instructions.

However, branching from interrupt vector means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors as mentioned
in commit 1707dd16. So, EXCEPTION_PROLOG_0
(6 instructions) that contains the part up to the point where the CFAR is
saved in the PACA should be part of the short interrupt vectors before we
branch out to OOL handlers.

But as mentioned already, there are interrupt vectors on 64-bit POWER server
processors that are only 32 bytes long (like vectors 0x4f00, 0x4f20, etc.),
which cannot accomodate the above two cases at the same time owing to space
constraint. Currently, in these interrupt vectors, we simply branch out to
OOL handlers, without using LOAD_HANDLER(), which leaves us vulnerable when
running a relocatable kernel (eg. kdump case). While this has been the case
for sometime now and kdump is used widely, we were fortunate not to see any
problems so far, for three reasons:

 1. In almost all cases, production kernel (relocatable) is used for
kdump as well, which would mean that crashed kernel's OOL handler
would be at the same place where we endup branching to, from short
interrupt vector of kdump kernel.
 2. Also, OOL handler was unlikely the reason for crash in almost all
the kdump scenarios, which meant we had a sane OOL handler from
crashed kernel that we branched to.
 3. On most 64-bit POWER server processors, page size is large enough
that marking interrupt vector code as executable (see commit
429d2e83) leads to marking OOL handler code from crashed kernel,
that sits right below interrupt vector code from kdump kernel, as
executable as well.

Let us fix this undependable code path firstly, by moving down __end_handlers
marker down past OOL handlers. Secondly, copying interrupt vectors down till
__end_handlers marker instead of __end_interrupts, when running a relocatable
kernel, to make sure we endup in relocated (kdump) kernel's OOL handler instead
of crashed kernel's. Thirdly, by marking all the interrupt vector code that is
copied down to real address 0x100 as executable, considering the relocation on
exception feature that allows exceptions to be raised in virtual mode (IR=DR=1).

This fix has been tested successfully in kdump scenario, on a lpar with 4K page
size by using different default/production kernel and kdump kernel.

So I think you've missed one important case.


My bad! I missed out on considering this case..


In do_final_fixups() we recopy the (now patched) kernel code down to zero. That
code uses __end_interrupts as its limit, so I think if you look closely your OOL
handlers down at zero will not have had feature fixups applied to them.

I think perhaps the better fix is just to move __end_interrupts down (up) to the
right location. AFAICS all users of __end_interrupts actually want that address.

It would also mean we could remove __end_handlers as unused.


True. This sounds less complicated.


So can you please check that I'm right about do_final_fixups(), and then try
moving __end_interrupts and check that works?


Yeah. Testing the patch. Will post it soon.
Thanks for the review!

- Hari

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v2] ppc64/book3s: fix branching to out of line handlers in relocation kernel

2016-03-30 Thread Hari Bathini



On 03/30/2016 12:44 PM, Hari Bathini wrote:



On 03/30/2016 05:55 AM, Michael Ellerman wrote:

On Tue, 2016-29-03 at 18:34:37 UTC, Hari Bathini wrote:
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S

index 7716ceb..e598580 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -764,8 +764,8 @@ kvmppc_skip_Hinterrupt:
  #endif
/*
- * Code from here down to __end_handlers is invoked from the
- * exception prologs above.  Because the prologs assemble the
+ * Code from here down to end of out of line handlers is invoked from
+ * the exception prologs above.  Because the prologs assemble the
I think it would be better to just replace __end_handlers with 
__end_interrupts,

that way it's entirely clear what location you're talking about.


@@ -953,11 +953,6 @@ hv_facility_unavailable_relon_trampoline:
  #endif
  STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
  -/* Other future vectors */
-.align7
-.globl__end_interrupts
-__end_interrupts:
-
  .align7
  system_call_entry:
  bsystem_call_common
@@ -1230,10 +1225,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
  STD_EXCEPTION_COMMON(0xf60, facility_unavailable, 
facility_unavailable_exception)
  STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, 
facility_unavailable_exception)

  -.align7
-.globl__end_handlers
-__end_handlers:
-
Sorry I wasn't clear in my last mail, please do this as a separate 
cleanup patch

after this patch.


ok..


@@ -1244,6 +1235,16 @@ __end_handlers:
  STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
  STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
  +/* FIXME: For now, let us move the __end_interrupts marker 
down past

Why is it FIXME?

In general I don't want to merge code that adds a FIXME unless there 
is some

very good reason.

AFAICS this is a permanent solution isn't it?


Except for a few short interrupt vectors like 0x4f00, 04f20, etc., all 
other
vectors defined till __end_interrupts marker ensure that 
LOAD_HANDLER() is
used for branching to labels like system_call_entry, 
data_access_common, etc.

that are currently not copied to real 0 in relocation case.

So, we are forced to move the __end_interrupts marker down only to handle
space constraint in the short vectors. So, I added the FIXME to remind 
the
scope for improvement in the code. But after thinking over again now, 
moving
the marker down makes us copy an additional 1~2 KB along with the 
21~22 KB
that we are copying already. So, not much of an improvement to lose 
sleep over

or to add a FIXME, I guess. Your thoughts?



Alternatively, how about moving the OOLs handlers that can't be branched 
with LOAD_HANDLER
under __end_interrupts. This way we won't be copying more than a few 
absolutely needed handlers.


STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
.
.
STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)


We can leave __end_handlers marker to indicate code that should be part 
of the

first 64K of kernel image.

Thanks
Hari


Also, FIXME is the reason, why I did not replace __end_handlers with
__end_interrupts in the comment earlier.

+ * the out-of-line handlers, to make sure we also copy OOL 
handlers
+ * to real adress 0x100 when running a relocatable kernel. This 
helps

It doesn't "help" it's 100% required.


Yep. Will change the wording.
Thanks for the review!

- Hari

+ * in cases where interrupt vectors are not long enough (like 
0x4f00,
+ * 0x4f20, etc.) to branch out to OOL handlers with 
LOAD_HANDLER().

+ */
+.align7
+.globl__end_interrupts
+__end_interrupts:
+
  #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
  /*
   * Data area reserved for FWNMI option.


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2] ppc64/book3s: fix branching to out of line handlers in relocation kernel

2016-03-29 Thread Hari Bathini
Some of the interrupt vectors on 64-bit POWER server processors  are
only 32 bytes long (8 instructions), which is not enough for the full
first-level interrupt handler. For these we need to branch to an out-
of-line (OOL) handler. But when we are running a relocatable kernel,
interrupt vectors till __end_interrupts marker are copied down to real
address 0x100. So, branching to labels (read OOL handlers) outside this
section should be handled differently (see LOAD_HANDLER()), considering
relocatable kernel, which would need atleast 4 instructions.

However, branching from interrupt vector means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors as mentioned
in commit 1707dd16. So, EXCEPTION_PROLOG_0 (6 instructions) that contains
the part up to the point where the CFAR is saved in the PACA should be
part of the short interrupt vectors before we branch out to OOL handlers.

But as mentioned already, there are interrupt vectors on 64-bit POWER server
processors that are only 32 bytes long (like vectors 0x4f00, 0x4f20, etc.),
which cannot accomodate the above two cases at the same time owing to space
constraint. Currently, in these interrupt vectors, we simply branch out to
OOL handlers, without using LOAD_HANDLER(), which leaves us vulnerable when
running a relocatable kernel (eg. kdump case). While this has been the case
for sometime now and kdump is used widely, we were fortunate not to see any
problems so far, for three reasons:

1. In almost all cases, production kernel (relocatable) is used for
   kdump as well, which would mean that crashed kernel's OOL handler
   would be at the same place where we endup branching to, from short
   interrupt vector of kdump kernel.
2. Also, OOL handler was unlikely the reason for crash in almost all
   the kdump scenarios, which meant we had a sane OOL handler from
   crashed kernel that we branched to.
3. On most 64-bit POWER server processors, page size is large enough
   that marking interrupt vector code as executable (see commit
   429d2e83) leads to marking OOL handler code from crashed kernel,
   that sits right below interrupt vector code from kdump kernel, as
   executable as well.

Let us fix this undependable code path by moving the __end_interrupts marker
down past OOL handlers to make sure that we also copy OOL handlers to real
address 0x100 when running a relocatable kernel. This helps in cases discussed
above, where interrupt vectors are not long enough to branch out to OOL handlers
with LOAD_HANDLER(). While we are here, let us remove the virtually 
insignificant
__end_handlers marker.

This fix has been tested successfully in kdump scenario, on a lpar with 4K page
size by using different default/production kernel and kdump kernel.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>
---

changes from v1:
1. Changed the subject from "copy interrupts till __end_handlers marker
   instead of __end_interrupts" to a more generic one
2. Used __end_interrupts marker instead of __end_handlers to make the fix
   less complicated.
3. Removed unused __end_handlers marker.


 arch/powerpc/kernel/exceptions-64s.S |   23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 7716ceb..e598580 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -764,8 +764,8 @@ kvmppc_skip_Hinterrupt:
 #endif
 
 /*
- * Code from here down to __end_handlers is invoked from the
- * exception prologs above.  Because the prologs assemble the
+ * Code from here down to end of out of line handlers is invoked from
+ * the exception prologs above.  Because the prologs assemble the
  * addresses of these handlers using the LOAD_HANDLER macro,
  * which uses an ori instruction, these handlers must be in
  * the first 64k of the kernel image.
@@ -953,11 +953,6 @@ hv_facility_unavailable_relon_trampoline:
 #endif
STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
 
-   /* Other future vectors */
-   .align  7
-   .globl  __end_interrupts
-__end_interrupts:
-
.align  7
 system_call_entry:
b   system_call_common
@@ -1230,10 +1225,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
STD_EXCEPTION_COMMON(0xf60, facility_unavailable, 
facility_unavailable_exception)
STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, 
facility_unavailable_exception)
 
-   .align  7
-   .globl  __end_handlers
-__end_handlers:
-
/* Equivalents to the above handlers for relocation-on interrupt 
vectors */
STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
@@ -1244,6 +1235,16 @@ __end_handlers:
STD_RELON_EXCEPTION_PSERIES_OOL(0xf60

Re: [PATCH v3] ppc64/book3s: fix branching to out of line handlers in relocation kernel

2016-04-01 Thread Hari Bathini



On 04/01/2016 11:44 AM, Michael Ellerman wrote:

On Wed, 2016-03-30 at 23:49 +0530, Hari Bathini wrote:

Some of the interrupt vectors on 64-bit POWER server processors  are
only 32 bytes long (8 instructions), which is not enough for the full

...

Let us fix this undependable code path by moving these OOL handlers below
__end_interrupts marker to make sure we also copy these handlers to real
address 0x100 when running a relocatable kernel. Because the interrupt
vectors branching to these OOL handlers are not long enough to use
LOAD_HANDLER() for branching as discussed above.


...

changes from v2:
2. Move the OOL handlers before __end_interrupts marker instead of moving the 
__end_interrupts marker
3. Leave __end_handlers marker as is.

Hi Hari,

Thanks for trying this. In the end I've decided it's not a good option.

If you build an allmodconfig, and turn on CONFIG_RELOCATABLE, and then look at
the disassembly, you see this:

   c0006ffc:   48 00 29 04 b   c0009900 
<.ret_from_except>
   
   c0007000 <__end_handlers>:


At 0x7000 we have the FWNMI area, which is fixed and can't move. As you see
above we end up with only 4 bytes of space between the end of the handlers and
the FWNMI area.

So any tiny change that adds two more instructions prior to 0x7000 will then
fail to build.


Hi Michael,

I agree. But the OOL handlers that are moved up in v3 were below
0x7000 earlier as well and moving them below __end_interrupts marker
shouldn't make any difference in terms of space consumption at least in
comparison between v2 & v3. So, I guess picking either v2 or v3
doesn't change this for better.

Also, there is code between __end_interrupts and __end_handlers
that is not location dependent as long as it is within 64K (0x1)
that can be moved above 0x8000, if need be.

For these reasons, I feel v3 is better going forward as it keeps
__start_interrupts to __end_interrupts code compact and
leaves alone the code that doesn't need to be copied to real 0.

Am I missing something here?

Thanks
Hari


None of that's your fault, it's just the nature of the code in there, it's very
space constrained.

For now I'll take your v2, but I'll edit the comment and drop the removal of
__end_handlers.

cheers



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] ppc64/book3s: copy interrupts till __end_handlers marker instead of __end_interrupts

2016-03-28 Thread Hari Bathini
Some of the interrupt vectors on 64-bit POWER server processors  are
only 32 bytes long (8 instructions), which is not enough for the full
first-level interrupt handler. For these we need to branch to an out-
of-line (OOL) handler. But when we are running a relocatable kernel,
interrupt vectors till __end_interrupts marker are copied down to real
address 0x100. So, branching to labels (read OOL handlers) outside this
section should be handled differently (see LOAD_HANDLER()), considering
relocatable kernel, which would need atleast 4 instructions.

However, branching from interrupt vector means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors as mentioned
in commit 1707dd16. So, EXCEPTION_PROLOG_0
(6 instructions) that contains the part up to the point where the CFAR is
saved in the PACA should be part of the short interrupt vectors before we
branch out to OOL handlers.

But as mentioned already, there are interrupt vectors on 64-bit POWER server
processors that are only 32 bytes long (like vectors 0x4f00, 0x4f20, etc.),
which cannot accomodate the above two cases at the same time owing to space
constraint. Currently, in these interrupt vectors, we simply branch out to
OOL handlers, without using LOAD_HANDLER(), which leaves us vulnerable when
running a relocatable kernel (eg. kdump case). While this has been the case
for sometime now and kdump is used widely, we were fortunate not to see any
problems so far, for three reasons:

1. In almost all cases, production kernel (relocatable) is used for
   kdump as well, which would mean that crashed kernel's OOL handler
   would be at the same place where we endup branching to, from short
   interrupt vector of kdump kernel.
2. Also, OOL handler was unlikely the reason for crash in almost all
   the kdump scenarios, which meant we had a sane OOL handler from
   crashed kernel that we branched to.
3. On most 64-bit POWER server processors, page size is large enough
   that marking interrupt vector code as executable (see commit
   429d2e83) leads to marking OOL handler code from crashed kernel,
   that sits right below interrupt vector code from kdump kernel, as
   executable as well.

Let us fix this undependable code path firstly, by moving down __end_handlers
marker down past OOL handlers. Secondly, copying interrupt vectors down till
__end_handlers marker instead of __end_interrupts, when running a relocatable
kernel, to make sure we endup in relocated (kdump) kernel's OOL handler instead
of crashed kernel's. Thirdly, by marking all the interrupt vector code that is
copied down to real address 0x100 as executable, considering the relocation on
exception feature that allows exceptions to be raised in virtual mode (IR=DR=1).

This fix has been tested successfully in kdump scenario, on a lpar with 4K page
size by using different default/production kernel and kdump kernel.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/sections.h  |3 ++-
 arch/powerpc/kernel/exceptions-64s.S |8 
 arch/powerpc/kernel/head_64.S|2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/sections.h 
b/arch/powerpc/include/asm/sections.h
index abf5866..b4139a5 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -10,6 +10,7 @@
 
 extern char __start_interrupts[];
 extern char __end_interrupts[];
+extern char __end_handlers[];
 
 extern char __prom_init_toc_start[];
 extern char __prom_init_toc_end[];
@@ -39,7 +40,7 @@ static inline int overlaps_interrupt_vector_text(unsigned 
long start,
 {
unsigned long real_start, real_end;
real_start = __start_interrupts - _stext;
-   real_end = __end_interrupts - _stext;
+   real_end = __end_handlers - _stext;
 
return start < (unsigned long)__va(real_end) &&
(unsigned long)__va(real_start) < end;
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 7716ceb..98e2ce5 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1230,10 +1230,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
STD_EXCEPTION_COMMON(0xf60, facility_unavailable, 
facility_unavailable_exception)
STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, 
facility_unavailable_exception)
 
-   .align  7
-   .globl  __end_handlers
-__end_handlers:
-
/* Equivalents to the above handlers for relocation-on interrupt 
vectors */
STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
@@ -1244,6 +1240,10 @@ __end_handlers:
STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
 
+ 

Re: [v2] ppc64/book3s: fix branching to out of line handlers in relocation kernel

2016-03-30 Thread Hari Bathini



On 03/30/2016 05:55 AM, Michael Ellerman wrote:

On Tue, 2016-29-03 at 18:34:37 UTC, Hari Bathini wrote:

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 7716ceb..e598580 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -764,8 +764,8 @@ kvmppc_skip_Hinterrupt:
  #endif
  
  /*

- * Code from here down to __end_handlers is invoked from the
- * exception prologs above.  Because the prologs assemble the
+ * Code from here down to end of out of line handlers is invoked from
+ * the exception prologs above.  Because the prologs assemble the

I think it would be better to just replace __end_handlers with __end_interrupts,
that way it's entirely clear what location you're talking about.


@@ -953,11 +953,6 @@ hv_facility_unavailable_relon_trampoline:
  #endif
STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
  
-	/* Other future vectors */

-   .align  7
-   .globl  __end_interrupts
-__end_interrupts:
-
.align  7
  system_call_entry:
b   system_call_common
@@ -1230,10 +1225,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
STD_EXCEPTION_COMMON(0xf60, facility_unavailable, 
facility_unavailable_exception)
STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, 
facility_unavailable_exception)
  
-	.align	7

-   .globl  __end_handlers
-__end_handlers:
-

Sorry I wasn't clear in my last mail, please do this as a separate cleanup patch
after this patch.


ok..


@@ -1244,6 +1235,16 @@ __end_handlers:
STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
  
+	/* FIXME: For now, let us move the __end_interrupts marker down past

Why is it FIXME?

In general I don't want to merge code that adds a FIXME unless there is some
very good reason.

AFAICS this is a permanent solution isn't it?


Except for a few short interrupt vectors like 0x4f00, 04f20, etc., all other
vectors defined till __end_interrupts marker ensure that LOAD_HANDLER() is
used for branching to labels like system_call_entry, data_access_common, 
etc.

that are currently not copied to real 0 in relocation case.

So, we are forced to move the __end_interrupts marker down only to handle
space constraint in the short vectors. So, I added the FIXME to remind the
scope for improvement in the code. But after thinking over again now, moving
the marker down makes us copy an additional 1~2 KB along with the 21~22 KB
that we are copying already. So, not much of an improvement to lose 
sleep over

or to add a FIXME, I guess. Your thoughts?

Also, FIXME is the reason, why I did not replace __end_handlers with
__end_interrupts in the comment earlier.


+* the out-of-line handlers, to make sure we also copy OOL handlers
+* to real adress 0x100 when running a relocatable kernel. This helps

It doesn't "help" it's 100% required.


Yep. Will change the wording.
Thanks for the review!

- Hari


+* in cases where interrupt vectors are not long enough (like 0x4f00,
+* 0x4f20, etc.) to branch out to OOL handlers with LOAD_HANDLER().
+*/
+   .align  7
+   .globl  __end_interrupts
+__end_interrupts:
+
  #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
  /*
   * Data area reserved for FWNMI option.


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 2/2] powerpc/fadump: add support to parse size based on memory range

2016-05-12 Thread Hari Bathini
Currently, memory for fadump can be specified with fadump_reserve_mem=size,
where only a fixed size can be specified. Add the below syntax as well, to
support conditional reservation based on system memory size:

fadump_reserve_mem=:[,:,...]

This syntax helps using the same commandline parameter for different system
memory sizes.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
Changes from v1:
1. Changed subject from "powerpc/fadump: add support to specify memory range 
based size"
2. Reused crashkernel parsing code that was moved to kernel/params.c (see patch 
1/2)

 arch/powerpc/kernel/fadump.c |   64 --
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index d0af58b..a868281 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -193,6 +193,56 @@ static unsigned long init_fadump_mem_struct(struct 
fadump_mem_struct *fdm,
return addr;
 }
 
+/*
+ * This function parses command line for fadump_reserve_mem=
+ *
+ * Supports the below two syntaxes:
+ *1. fadump_reserve_mem=size
+ *2. fadump_reserve_mem=ramsize-range:size[,...]
+ *
+ * Sets fw_dump.reserve_bootvar with the memory size
+ * provided, 0 otherwise
+ *
+ * The function returns -EINVAL on failure, 0 otherwise.
+ */
+static int __init parse_fadump_reserve_mem(void)
+{
+   char *name = "fadump_reserve_mem=";
+   char *fadump_cmdline = NULL, *cur;
+
+   fw_dump.reserve_bootvar = 0;
+
+   /* find fadump_reserve_mem and use the last one if there are many */
+   cur = strstr(boot_command_line, name);
+   while (cur) {
+   fadump_cmdline = cur;
+   cur = strstr(cur+1, name);
+   }
+
+   /* when no fadump_reserve_mem= cmdline option is provided */
+   if (!fadump_cmdline)
+   return 0;
+
+   fadump_cmdline += strlen(name);
+
+   /* for fadump_reserve_mem=size cmdline syntax */
+   if (!is_param_range_based(fadump_cmdline)) {
+   fw_dump.reserve_bootvar = memparse(fadump_cmdline, NULL);
+   return 0;
+   }
+
+   /* for fadump_reserve_mem=ramsize-range:size[,...] cmdline syntax */
+   cur = fadump_cmdline;
+   fw_dump.reserve_bootvar = parse_mem_range_size("fadump_reserve_mem",
+   , memblock_phys_mem_size());
+   if (cur == fadump_cmdline) {
+   printk(KERN_INFO "fadump_reserve_mem: Invaild syntax!\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 /**
  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
  *
@@ -212,12 +262,17 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
 {
unsigned long size;
 
+   /* sets fw_dump.reserve_bootvar */
+   parse_fadump_reserve_mem();
+
/*
 * Check if the size is specified through fadump_reserve_mem= cmdline
 * option. If yes, then use that.
 */
if (fw_dump.reserve_bootvar)
return fw_dump.reserve_bootvar;
+   else
+   printk(KERN_INFO "fadump: calculating default boot size\n");
 
/* divide by 20 to get 5% of value */
size = memblock_end_of_DRAM() / 20;
@@ -352,15 +407,6 @@ static int __init early_fadump_param(char *p)
 }
 early_param("fadump", early_fadump_param);
 
-/* Look for fadump_reserve_mem= cmdline option */
-static int __init early_fadump_reserve_mem(char *p)
-{
-   if (p)
-   fw_dump.reserve_bootvar = memparse(p, );
-   return 0;
-}
-early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-
 static void register_fw_dump(struct fadump_mem_struct *fdm)
 {
int rc;

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/2] Refactor code parsing size based on memory range

2016-05-12 Thread Hari Bathini
Currently, crashkernel parameter supports the below syntax to parse size
based on memory range:

crashkernel=:[,:,...]

While such parsing is implemented for crashkernel parameter, it applies to
other parameters with similar syntax. So, move this code to a more generic
place for code reuse.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
While this patch in itself has nothing to do with powerpc,
the powerpc patch (2/2) depends on this patch..

 include/linux/kernel.h |5 +++
 kernel/kexec_core.c|   63 +++-
 kernel/params.c|   96 
 3 files changed, 106 insertions(+), 58 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2f7775e..e755ed1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -429,6 +429,11 @@ extern char *get_options(const char *str, int nints, int 
*ints);
 extern unsigned long long memparse(const char *ptr, char **retptr);
 extern bool parse_option_str(const char *str, const char *option);
 
+extern bool __init is_param_range_based(const char *cmdline);
+extern unsigned long long __init parse_mem_range_size(const char *param,
+ char **str,
+ unsigned long long 
system_ram);
+
 extern int core_kernel_text(unsigned long addr);
 extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 1391d3e..71e92b2 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1084,59 +1084,9 @@ static int __init parse_crashkernel_mem(char *cmdline,
char *cur = cmdline, *tmp;
 
/* for each entry of the comma-separated list */
-   do {
-   unsigned long long start, end = ULLONG_MAX, size;
-
-   /* get the start of the range */
-   start = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (*cur != '-') {
-   pr_warn("crashkernel: '-' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   /* if no ':' is here, than we read the end */
-   if (*cur != ':') {
-   end = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (end <= start) {
-   pr_warn("crashkernel: end <= start\n");
-   return -EINVAL;
-   }
-   }
-
-   if (*cur != ':') {
-   pr_warn("crashkernel: ':' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   size = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (size >= system_ram) {
-   pr_warn("crashkernel: invalid size\n");
-   return -EINVAL;
-   }
-
-   /* match ? */
-   if (system_ram >= start && system_ram < end) {
-   *crash_size = size;
-   break;
-   }
-   } while (*cur++ == ',');
+   *crash_size = parse_mem_range_size("crashkernel", , system_ram);
+   if (cur == cmdline)
+   return -EINVAL;
 
if (*crash_size > 0) {
while (*cur && *cur != ' ' && *cur != '@')
@@ -1273,7 +1223,6 @@ static int __init __parse_crashkernel(char *cmdline,
 const char *name,
 const char *suffix)
 {
-   char*first_colon, *first_space;
char*ck_cmdline;
 
BUG_ON(!crash_size || !crash_base);
@@ -1291,12 +1240,10 @@ static int __init __parse_crashkernel(char *cmdline,
return parse_crashkernel_suffix(ck_cmdline, crash_size,
suffix);
/*
-* if the commandline contains a ':', then that's the extended
+* if the parameter is range based, then that's the extended
 * syntax -- if not, it must be the classic syntax
 */
-   first_colon = strchr(ck_cmdline, ':');
-   first_space = strchr(ck_cmdline, ' ');
-   if (first_colon && (!first_space || first

[PATCH 2/3] powerpc/fadump: add support to specify memory range based size

2016-05-06 Thread Hari Bathini
Currently, memory for fadump can be specified with fadump_reserve_mem=size,
where only a fixed size can be specified. This patch tries to extend this
syntax to support conditional reservation based on memory size, with the
below syntax:

fadump_reserve_mem=:[,:,...]

This syntax helps using the same commandline parameter for different system
memory sizes.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/fadump.c |  127 +++---
 1 file changed, 118 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index d0af58b..a7fef3e 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -193,6 +193,121 @@ static unsigned long init_fadump_mem_struct(struct 
fadump_mem_struct *fdm,
return addr;
 }
 
+#define FADUMP_MEM_CMDLINE_PREFIX  "fadump_reserve_mem="
+
+static __init char *get_last_fadump_reserve_mem(void)
+{
+   char *p = boot_command_line, *fadump_cmdline = NULL;
+
+   /* find fadump_reserve_mem and use the last one if there are more */
+   p = strstr(p, FADUMP_MEM_CMDLINE_PREFIX);
+   while (p) {
+   fadump_cmdline = p;
+   p = strstr(p+1, FADUMP_MEM_CMDLINE_PREFIX);
+   }
+
+   return fadump_cmdline;
+}
+
+#define parse_fadump_print(fmt, arg...) \
+   printk(KERN_INFO "fadump_reserve_mem: " fmt, ##arg)
+
+/*
+ * This function parses command line for fadump_reserve_mem=
+ *
+ * Supports the below two syntaxes:
+ *1. fadump_reserve_mem=size
+ *2. fadump_reserve_mem=ramsize-range:size[,...]
+ *
+ * Sets fw_dump.reserve_bootvar with the memory size
+ * provided, 0 otherwise
+ *
+ * The function returns -EINVAL on failure, 0 otherwise.
+ */
+static int __init parse_fadump_reserve_mem(void)
+{
+   char *cur, *tmp;
+   char *first_colon, *first_space;
+   char *fadump_cmdline;
+   unsigned long long system_ram;
+
+   fw_dump.reserve_bootvar = 0;
+   fadump_cmdline = get_last_fadump_reserve_mem();
+
+   /* when no fadump_reserve_mem= cmdline option is provided */
+   if (!fadump_cmdline)
+   return 0;
+
+   first_colon = strchr(fadump_cmdline, ':');
+   first_space = strchr(fadump_cmdline, ' ');
+   cur = fadump_cmdline + strlen(FADUMP_MEM_CMDLINE_PREFIX);
+
+   /* for fadump_reserve_mem=size cmdline syntax */
+   if (!first_colon || (first_space && (first_colon > first_space))) {
+   fw_dump.reserve_bootvar = memparse(cur, );
+   return 0;
+   }
+
+   /* for fadump_reserve_mem=ramsize-range:size[,...] cmdline syntax */
+   system_ram = memblock_phys_mem_size();
+   /* for each entry of the comma-separated list */
+   do {
+   unsigned long long start, end = ULLONG_MAX, size;
+
+   /* get the start of the range */
+   start = memparse(cur, );
+   if (cur == tmp) {
+   parse_fadump_print("Memory value expected\n");
+   return -EINVAL;
+   }
+   cur = tmp;
+   if (*cur != '-') {
+   parse_fadump_print("'-' expected\n");
+   return -EINVAL;
+   }
+   cur++;
+
+   /* if no ':' is here, than we read the end */
+   if (*cur != ':') {
+   end = memparse(cur, );
+   if (cur == tmp) {
+   parse_fadump_print("Memory value expected\n");
+   return -EINVAL;
+   }
+   cur = tmp;
+   if (end <= start) {
+   parse_fadump_print("end <= start\n");
+   return -EINVAL;
+   }
+   }
+
+   if (*cur != ':') {
+   parse_fadump_print("':' expected\n");
+   return -EINVAL;
+   }
+   cur++;
+
+   size = memparse(cur, );
+   if (cur == tmp) {
+   parse_fadump_print("Memory value expected\n");
+   return -EINVAL;
+   }
+   cur = tmp;
+   if (size >= system_ram) {
+   parse_fadump_print("invalid size\n");
+   return -EINVAL;
+   }
+
+   /* match ? */
+   if (system_ram >= start && system_ram < end) {
+   fw_dump.reserve_bootvar = size;
+   break;
+   }
+   } while (*cur++ == ',');
+
+   return 0;
+}
+
 /**
  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
  *
@@ -212,6 +327,9 @@ static 

[PATCH 1/3] powerpc/fadump: set an upper limit for the default memory reserved for fadump

2016-05-06 Thread Hari Bathini
When boot memory size for fadump is not specified, memory is reserved
for fadump based on system RAM size. As the system RAM size increases,
the memory reserved for fadump increases as well. This patch sets an
upper limit on the memory reserved for fadump, to avoid reserving
excess memory.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/fadump.h |6 ++
 arch/powerpc/kernel/fadump.c  |4 
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index b4407d0..2c3cb32 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -43,6 +43,12 @@
 #define MIN_BOOT_MEM   (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
+ (0x1UL << 26))
 
+/*
+ * Maximum memory needed for fadump to boot up successfully. Use this as
+ * an upper limit for fadump so we don't endup reserving excess memory.
+ */
+#define MAX_BOOT_MEM   (0x1UL << 32)
+
 #define memblock_num_regions(memblock_type)(memblock.memblock_type.cnt)
 
 #ifndef ELF_CORE_EFLAGS
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 3cb3b02a..d0af58b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -225,6 +225,10 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
/* round it down in multiples of 256 */
size = size & ~0x0FFFUL;
 
+   /* Set an upper limit on the memory to be reserved */
+   if (size > MAX_BOOT_MEM)
+   size = MAX_BOOT_MEM;
+
/* Truncate to memory_limit. We don't want to over reserve the memory.*/
if (memory_limit && size > memory_limit)
size = memory_limit;

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/3] powerpc/fadump: add support for fadump_nr_cpus= parameter

2016-05-06 Thread Hari Bathini
Kernel parameter 'nr_cpus' can be used to limit the maximum number
of processors that an SMP kernel could support. This patch extends
this to fadump by introducing 'fadump_nr_cpus' parameter that can
help in booting fadump kernel on a lower memory footprint.

Suggested-by: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>
Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/fadump.c |   22 ++
 1 file changed, 22 insertions(+)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index a7fef3e..c75783c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -470,6 +470,28 @@ static int __init early_fadump_param(char *p)
 }
 early_param("fadump", early_fadump_param);
 
+/* Look for fadump_nr_cpus= cmdline option. */
+static int __init early_fadump_nrcpus(char *p)
+{
+   int nr_cpus;
+
+   /*
+* fadump_nr_cpus parameter is only applicable on a
+* fadump active kernel. This is to reduce memory
+* needed to boot a fadump active kernel.
+* So, check if we are booting after crash.
+*/
+   if (!is_fadump_active())
+   return 0;
+
+   get_option(, _cpus);
+   if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
+   nr_cpu_ids = nr_cpus;
+
+   return 0;
+}
+early_param("fadump_nr_cpus", early_fadump_nrcpus);
+
 static void register_fw_dump(struct fadump_mem_struct *fdm)
 {
int rc;

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v4, 2/3] ppc64/book3s: make some room for common interrupt vector code

2016-04-17 Thread Hari Bathini



On 04/15/2016 06:29 PM, Michael Ellerman wrote:

On Fri, 2016-04-15 at 21:06 +1000, Michael Ellerman wrote:

Hi Hari,

Thanks for persisting with this.

On Thu, 2016-07-04 at 21:58:50 UTC, Hari Bathini wrote:

With the previous patch, we choke out whatever little space is left
below 0x7000 (FWNMI hard block) while there is a hole of ~1400 bytes
below __end_interrupts marker when CONFIG_CBE_RAS is disabled.
Considering CONFIG_CBE_RAS is not enabled by default for BOOK3S, this
is not a desirable scenario especially when we have to worry about
each additional instruction that goes below 0x7000.

Memory region from 0x1800 to 0x4000 is dedicated for common interrupt
vector code. Also, we never hit an interrupt below 0x300 when IR=DR=1
implying memory region between 0x4000 to 0x4300 can also be used for
common interrupt vector code. So, we can effectively use memory region
between 0x1800 to 0x4300 for common interrupt vector code.

On Power9 the system-call-vectored instruction will use the region at 0x3000, so
moving code into that space is not a good long term plan.

I'll take your v2 and put it in next next week.

I'll add this fixes line, which I think is correct:

Fixes: c1fb6816fb1b ("powerpc: Add relocation on exception vector handlers")


Yeah. Thanks!


cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [3/3] powerpc/fadump: add support for fadump_nr_cpus= parameter

2016-05-09 Thread Hari Bathini



On 05/07/2016 09:42 AM, Michael Ellerman wrote:

On Fri, 2016-06-05 at 11:51:08 UTC, Hari Bathini wrote:

Kernel parameter 'nr_cpus' can be used to limit the maximum number
of processors that an SMP kernel could support. This patch extends
this to fadump by introducing 'fadump_nr_cpus' parameter that can
help in booting fadump kernel on a lower memory footprint.

Is there really no other way to do this? I really hate adding new, single use
only command line parameters.


Hmmm.. only alternative I can think about is enforcing a certain nr_cpu_ids
value whenever fadump is active, but that doesn't sound right..
Any suggestions?

Thanks
Hari


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [2/3] powerpc/fadump: add support to specify memory range based size

2016-05-09 Thread Hari Bathini



On 05/07/2016 09:41 AM, Michael Ellerman wrote:

On Fri, 2016-06-05 at 11:50:37 UTC, Hari Bathini wrote:

Currently, memory for fadump can be specified with fadump_reserve_mem=size,
where only a fixed size can be specified. This patch tries to extend this
syntax to support conditional reservation based on memory size, with the
below syntax:

fadump_reserve_mem=:[,:,...]

This syntax helps using the same commandline parameter for different system
memory sizes.

This is basically using the crashkernel= syntax right?


Yep. One of the typical crashkernel syntax..


So can we please reuse the crashkernel= parsing code?


but crashkernel has a few other variants which don't make sense
for fadump. To reuse the crashkernel parsing code for fadump,
it needs little bit of refactoring. Will try to do that and respin..


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RESEND][PATCH v2 1/2] kexec: refactor code parsing size based on memory range

2016-08-03 Thread Hari Bathini
crashkernel parameter supports different syntaxes to specify the amount
of memory to be reserved for kdump kernel. Below is one of the supported
syntaxes that needs parsing to find the memory size to reserve, based on
memory range:

crashkernel=:[,:,...]

While such parsing is implemented for crashkernel parameter, it applies
to other parameters, like fadump_reserve_mem=, which could use similar
syntax. This patch moves crashkernel's parsing code for above syntax to
to kernel/params.c file for reuse. Two functions is_param_range_based()
and parse_mem_range_size() are added to kernel/params.c file for this
purpose.

Any parameter that uses the above syntax can use is_param_range_based()
function to validate the syntax and parse_mem_range_size() function to
get the parsed memory size. While some code is moved to kernel/params.c
file, there is no change functionality wise in parsing the crashkernel
parameter.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v1:
1. Updated changelog

 include/linux/kernel.h |5 +++
 kernel/kexec_core.c|   63 +++-
 kernel/params.c|   96 
 3 files changed, 106 insertions(+), 58 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a611..2df7ba2 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -435,6 +435,11 @@ extern char *get_options(const char *str, int nints, int 
*ints);
 extern unsigned long long memparse(const char *ptr, char **retptr);
 extern bool parse_option_str(const char *str, const char *option);
 
+extern bool __init is_param_range_based(const char *cmdline);
+extern unsigned long long __init parse_mem_range_size(const char *param,
+ char **str,
+ unsigned long long 
system_ram);
+
 extern int core_kernel_text(unsigned long addr);
 extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 5616755..3a74024 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1104,59 +1104,9 @@ static int __init parse_crashkernel_mem(char *cmdline,
char *cur = cmdline, *tmp;
 
/* for each entry of the comma-separated list */
-   do {
-   unsigned long long start, end = ULLONG_MAX, size;
-
-   /* get the start of the range */
-   start = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (*cur != '-') {
-   pr_warn("crashkernel: '-' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   /* if no ':' is here, than we read the end */
-   if (*cur != ':') {
-   end = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (end <= start) {
-   pr_warn("crashkernel: end <= start\n");
-   return -EINVAL;
-   }
-   }
-
-   if (*cur != ':') {
-   pr_warn("crashkernel: ':' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   size = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (size >= system_ram) {
-   pr_warn("crashkernel: invalid size\n");
-   return -EINVAL;
-   }
-
-   /* match ? */
-   if (system_ram >= start && system_ram < end) {
-   *crash_size = size;
-   break;
-   }
-   } while (*cur++ == ',');
+   *crash_size = parse_mem_range_size("crashkernel", , system_ram);
+   if (cur == cmdline)
+   return -EINVAL;
 
if (*crash_size > 0) {
while (*cur && *cur != ' ' && *cur != '@')
@@ -1293,7 +1243,6 @@ static int __init __parse_crashkernel(char *cmdline,
 const char *name,
 const char *suffix)
 {
-   char*first_colon, *first_space;
char*ck_cmdline;
 
BUG_ON(!crash_size || !crash_base);
@@ -1311,12 +1260,1

[RESEND][PATCH v2 0/2] powerpc/fadump: support memory range syntax for fadump memory reservation

2016-08-03 Thread Hari Bathini
This patchset adds support to input system memory range based memory size
for fadump reservation. The crashkernel parameter already supports such
syntax. The first patch refactors the parsing code of crashkernel parameter
for reuse. The second patch uses the newly refactored parsing code to reserve
memory for fadump based on system memory size.

---

Hari Bathini (2):
  kexec: refactor code parsing size based on memory range
  powerpc/fadump: parse fadump reserve memory size based on memory range


 arch/powerpc/kernel/fadump.c |   64 
 include/linux/kernel.h   |5 ++
 kernel/kexec_core.c  |   63 ++--
 kernel/params.c  |   96 ++
 4 files changed, 161 insertions(+), 67 deletions(-)

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RESEND][PATCH v2 2/2] powerpc/fadump: parse fadump reserve memory size based on memory range

2016-08-03 Thread Hari Bathini
Currently, memory for fadump can be specified with fadump_reserve_mem=size,
where only a fixed size can be specified. Add the below syntax as well, to
support conditional reservation based on system memory size:

fadump_reserve_mem=:[,:,...]

This syntax helps using the same commandline parameter for different system
memory sizes.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
Reviewed-by: Mahesh J Salgaonkar <mah...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/fadump.c |   64 --
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index b3a6633..4661ae6 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -193,6 +193,56 @@ static unsigned long init_fadump_mem_struct(struct 
fadump_mem_struct *fdm,
return addr;
 }
 
+/*
+ * This function parses command line for fadump_reserve_mem=
+ *
+ * Supports the below two syntaxes:
+ *1. fadump_reserve_mem=size
+ *2. fadump_reserve_mem=ramsize-range:size[,...]
+ *
+ * Sets fw_dump.reserve_bootvar with the memory size
+ * provided, 0 otherwise
+ *
+ * The function returns -EINVAL on failure, 0 otherwise.
+ */
+static int __init parse_fadump_reserve_mem(void)
+{
+   char *name = "fadump_reserve_mem=";
+   char *fadump_cmdline = NULL, *cur;
+
+   fw_dump.reserve_bootvar = 0;
+
+   /* find fadump_reserve_mem and use the last one if there are many */
+   cur = strstr(boot_command_line, name);
+   while (cur) {
+   fadump_cmdline = cur;
+   cur = strstr(cur+1, name);
+   }
+
+   /* when no fadump_reserve_mem= cmdline option is provided */
+   if (!fadump_cmdline)
+   return 0;
+
+   fadump_cmdline += strlen(name);
+
+   /* for fadump_reserve_mem=size cmdline syntax */
+   if (!is_param_range_based(fadump_cmdline)) {
+   fw_dump.reserve_bootvar = memparse(fadump_cmdline, NULL);
+   return 0;
+   }
+
+   /* for fadump_reserve_mem=ramsize-range:size[,...] cmdline syntax */
+   cur = fadump_cmdline;
+   fw_dump.reserve_bootvar = parse_mem_range_size("fadump_reserve_mem",
+   , memblock_phys_mem_size());
+   if (cur == fadump_cmdline) {
+   printk(KERN_INFO "fadump_reserve_mem: Invaild syntax!\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 /**
  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
  *
@@ -212,12 +262,17 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
 {
unsigned long size;
 
+   /* sets fw_dump.reserve_bootvar */
+   parse_fadump_reserve_mem();
+
/*
 * Check if the size is specified through fadump_reserve_mem= cmdline
 * option. If yes, then use that.
 */
if (fw_dump.reserve_bootvar)
return fw_dump.reserve_bootvar;
+   else
+   printk(KERN_INFO "fadump: calculating default boot size\n");
 
/* divide by 20 to get 5% of value */
size = memblock_end_of_DRAM() / 20;
@@ -348,15 +403,6 @@ static int __init early_fadump_param(char *p)
 }
 early_param("fadump", early_fadump_param);
 
-/* Look for fadump_reserve_mem= cmdline option */
-static int __init early_fadump_reserve_mem(char *p)
-{
-   if (p)
-   fw_dump.reserve_bootvar = memparse(p, );
-   return 0;
-}
-early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-
 static void register_fw_dump(struct fadump_mem_struct *fdm)
 {
int rc;

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/2] kexec: refactor code parsing size based on memory range

2016-08-10 Thread Hari Bathini
crashkernel parameter supports different syntaxes to specify the amount
of memory to be reserved for kdump kernel. Below is one of the supported
syntaxes that needs parsing to find the memory size to reserve, based on
memory range:

crashkernel=:[,:,...]

While such parsing is implemented for crashkernel parameter, it applies
to other parameters, like fadump_reserve_mem=, which could use similar
syntax. This patch moves crashkernel's parsing code for above syntax to
to kernel/params.c file for reuse. Two functions is_param_range_based()
and parse_mem_range_size() are added to kernel/params.c file for this
purpose.

Any parameter that uses the above syntax can use is_param_range_based()
function to validate the syntax and parse_mem_range_size() function to
get the parsed memory size. While some code is moved to kernel/params.c
file, there is no change functionality wise in parsing the crashkernel
parameter.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v2:
1. Moved the code to lib/cmdline.c instead of kernel/params.c


 include/linux/kernel.h |5 ++
 kernel/kexec_core.c|   63 ++---
 lib/cmdline.c  |  104 
 3 files changed, 114 insertions(+), 58 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a611..39ff869 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -435,6 +435,11 @@ extern char *get_options(const char *str, int nints, int 
*ints);
 extern unsigned long long memparse(const char *ptr, char **retptr);
 extern bool parse_option_str(const char *str, const char *option);
 
+extern bool __init is_colon_in_param(const char *cmdline);
+extern unsigned long long __init parse_mem_range_size(const char *param,
+ char **str,
+ unsigned long long 
system_ram);
+
 extern int core_kernel_text(unsigned long addr);
 extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 5616755..152c4c1 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1104,59 +1104,9 @@ static int __init parse_crashkernel_mem(char *cmdline,
char *cur = cmdline, *tmp;
 
/* for each entry of the comma-separated list */
-   do {
-   unsigned long long start, end = ULLONG_MAX, size;
-
-   /* get the start of the range */
-   start = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (*cur != '-') {
-   pr_warn("crashkernel: '-' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   /* if no ':' is here, than we read the end */
-   if (*cur != ':') {
-   end = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (end <= start) {
-   pr_warn("crashkernel: end <= start\n");
-   return -EINVAL;
-   }
-   }
-
-   if (*cur != ':') {
-   pr_warn("crashkernel: ':' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   size = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (size >= system_ram) {
-   pr_warn("crashkernel: invalid size\n");
-   return -EINVAL;
-   }
-
-   /* match ? */
-   if (system_ram >= start && system_ram < end) {
-   *crash_size = size;
-   break;
-   }
-   } while (*cur++ == ',');
+   *crash_size = parse_mem_range_size("crashkernel", , system_ram);
+   if (cur == cmdline)
+   return -EINVAL;
 
if (*crash_size > 0) {
while (*cur && *cur != ' ' && *cur != '@')
@@ -1293,7 +1243,6 @@ static int __init __parse_crashkernel(char *cmdline,
 const char *name,
 const char *suffix)
 {
-   char*first_colon, *first_space;
char*ck_cmdline;
 
BUG_ON(!crash_size || !crash_bas

[PATCH v3 0/2] powerpc/fadump: support memory range syntax for fadump memory reservation

2016-08-10 Thread Hari Bathini
This patchset adds support to input system memory range based memory size
for fadump reservation. The crashkernel parameter already supports such
syntax. The first patch refactors the parsing code of crashkernel parameter
for reuse. The second patch uses the newly refactored parsing code to reserve
memory for fadump based on system memory size.

---

Hari Bathini (2):
  kexec: refactor code parsing size based on memory range
  powerpc/fadump: parse fadump reserve memory size based on memory range


 arch/powerpc/kernel/fadump.c |   63 ++---
 include/linux/kernel.h   |5 ++
 kernel/kexec_core.c  |   63 ++---
 lib/cmdline.c|  104 ++
 4 files changed, 168 insertions(+), 67 deletions(-)



[PATCH v3 2/2] powerpc/fadump: parse fadump reserve memory size based on memory range

2016-08-10 Thread Hari Bathini
When fadump is enabled, by default 5% of system RAM is reserved for
fadump kernel. While that works for most cases, it is not good enough
for every case.

Currently, to override the default value, fadump supports specifying
memory to reserve with fadump_reserve_mem=size, where only a fixed size
can be specified. This patch adds support to specify memory size to
reserve for different memory ranges as below:

fadump_reserve_mem=:[,:,...]

Supporting range based input for "fadump_reserve_mem" parameter helps
using the same commandline parameter for different system memory sizes.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
Reviewed-by: Mahesh J Salgaonkar <mah...@linux.vnet.ibm.com>
---

Changes from v2:
1. Updated changelog


 arch/powerpc/kernel/fadump.c |   63 --
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index b3a6633..7c01b5b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -193,6 +193,55 @@ static unsigned long init_fadump_mem_struct(struct 
fadump_mem_struct *fdm,
return addr;
 }
 
+/*
+ * This function parses command line for fadump_reserve_mem=
+ *
+ * Supports the below two syntaxes:
+ *1. fadump_reserve_mem=size
+ *2. fadump_reserve_mem=ramsize-range:size[,...]
+ *
+ * Sets fw_dump.reserve_bootvar with the memory size
+ * provided, 0 otherwise
+ *
+ * The function returns -EINVAL on failure, 0 otherwise.
+ */
+static int __init parse_fadump_reserve_mem(void)
+{
+   char *name = "fadump_reserve_mem=";
+   char *fadump_cmdline = NULL, *cur;
+
+   fw_dump.reserve_bootvar = 0;
+
+   /* find fadump_reserve_mem and use the last one if there are many */
+   cur = strstr(boot_command_line, name);
+   while (cur) {
+   fadump_cmdline = cur;
+   cur = strstr(cur+1, name);
+   }
+
+   /* when no fadump_reserve_mem= cmdline option is provided */
+   if (!fadump_cmdline)
+   return 0;
+
+   fadump_cmdline += strlen(name);
+
+   /* for fadump_reserve_mem=size cmdline syntax */
+   if (!is_colon_in_param(fadump_cmdline)) {
+   fw_dump.reserve_bootvar = memparse(fadump_cmdline, NULL);
+   return 0;
+   }
+
+   /* for fadump_reserve_mem=ramsize-range:size[,...] cmdline syntax */
+   cur = fadump_cmdline;
+   fw_dump.reserve_bootvar = parse_mem_range_size("fadump_reserve_mem",
+   , memblock_phys_mem_size());
+   if (cur == fadump_cmdline) {
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 /**
  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
  *
@@ -212,12 +261,17 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
 {
unsigned long size;
 
+   /* sets fw_dump.reserve_bootvar */
+   parse_fadump_reserve_mem();
+
/*
 * Check if the size is specified through fadump_reserve_mem= cmdline
 * option. If yes, then use that.
 */
if (fw_dump.reserve_bootvar)
return fw_dump.reserve_bootvar;
+   else
+   printk(KERN_INFO "fadump: calculating default boot size\n");
 
/* divide by 20 to get 5% of value */
size = memblock_end_of_DRAM() / 20;
@@ -348,15 +402,6 @@ static int __init early_fadump_param(char *p)
 }
 early_param("fadump", early_fadump_param);
 
-/* Look for fadump_reserve_mem= cmdline option */
-static int __init early_fadump_reserve_mem(char *p)
-{
-   if (p)
-   fw_dump.reserve_bootvar = memparse(p, );
-   return 0;
-}
-early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-
 static void register_fw_dump(struct fadump_mem_struct *fdm)
 {
int rc;



Re: [v2,1/2] refactor code parsing size based on memory range

2016-07-19 Thread Hari Bathini


Ping..


On Friday 24 June 2016 10:45 PM, Hari Bathini wrote:



On 06/24/2016 10:56 AM, Michael Ellerman wrote:

On Wed, 2016-22-06 at 19:25:26 UTC, Hari Bathini wrote:
Currently, crashkernel parameter supports the below syntax to parse 
size

based on memory range:

crashkernel=:[,:,...]

While such parsing is implemented for crashkernel parameter, it 
applies to
other parameters with similar syntax. So, move this code to a more 
generic

place for code reuse.

Cc: Eric Biederman <ebied...@xmission.com>
Cc: Vivek Goyal <vgo...@redhat.com>
Cc: Rusty Russell <ru...@rustcorp.com.au>
Cc: ke...@lists.infradead.org
Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
Hari, it's not immediately clear that this makes no change to the 
logic in the
kexec code. Can you reply with a longer change log explaining why the 
old & new

logic is the same for kexec.



Hi Michael,

Please consider this changelog for this patch:

--
crashkernel parameter supports different syntaxes to specify the amount
of memory to be reserved for kdump kernel. Below is one of the supported
syntaxes that needs parsing to find the memory size to reserve, based on
memory range:

crashkernel=:[,:,...]

While such parsing is implemented for crashkernel parameter, it 
applies to
other parameters, like fadump_reserve_mem, which could use similar 
syntax.
So, to reuse code, moving the code that checks if the parameter syntax 
is as
above and also the code that parses memory size to reserve, for this 
syntax.
While the code is moved to kernel/params.c file, there is no change in 
logic
for crashkernel parameter parsing as the moved code is invoked with 
function

calls at appropriate places.
--

Thanks
Hari





diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 94aa10f..72f55e5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -436,6 +436,11 @@ extern char *get_options(const char *str, int 
nints, int *ints);

  extern unsigned long long memparse(const char *ptr, char **retptr);
  extern bool parse_option_str(const char *str, const char *option);
  +extern bool __init is_param_range_based(const char *cmdline);
+extern unsigned long long __init parse_mem_range_size(const char 
*param,

+  char **str,
+  unsigned long long system_ram);
+
  extern int core_kernel_text(unsigned long addr);
  extern int core_kernel_data(unsigned long addr);
  extern int __kernel_text_address(unsigned long addr);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 56b3ed0..d43f5cc 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1083,59 +1083,9 @@ static int __init parse_crashkernel_mem(char 
*cmdline,

  char *cur = cmdline, *tmp;
/* for each entry of the comma-separated list */
-do {
-unsigned long long start, end = ULLONG_MAX, size;
-
-/* get the start of the range */
-start = memparse(cur, );
-if (cur == tmp) {
-pr_warn("crashkernel: Memory value expected\n");
-return -EINVAL;
-}
-cur = tmp;
-if (*cur != '-') {
-pr_warn("crashkernel: '-' expected\n");
-return -EINVAL;
-}
-cur++;
-
-/* if no ':' is here, than we read the end */
-if (*cur != ':') {
-end = memparse(cur, );
-if (cur == tmp) {
-pr_warn("crashkernel: Memory value expected\n");
-return -EINVAL;
-}
-cur = tmp;
-if (end <= start) {
-pr_warn("crashkernel: end <= start\n");
-return -EINVAL;
-}
-}
-
-if (*cur != ':') {
-pr_warn("crashkernel: ':' expected\n");
-return -EINVAL;
-}
-cur++;
-
-size = memparse(cur, );
-if (cur == tmp) {
-pr_warn("Memory value expected\n");
-return -EINVAL;
-}
-cur = tmp;
-if (size >= system_ram) {
-pr_warn("crashkernel: invalid size\n");
-return -EINVAL;
-}
-
-/* match ? */
-if (system_ram >= start && system_ram < end) {
-*crash_size = size;
-break;
-}
-} while (*cur++ == ',');
+*crash_size = parse_mem_range_size("crashkernel", , 
system_ram);

+if (cur == cmdline)
+return -EINVAL;
if (*crash_size > 0) {
  while (*cur && *cur != ' ' && *cur != '@')
@@ -1272,7 +1222,6 @@ static int __init __parse_crashkernel(char 
*cmdline,

   const char *name,
   const char *suffix)
  {
-char*first_colon, *first_space;
  char*ck_cmdline;
BUG_ON(!crash_size || !crash_base);
@@ -1290,12 +1239,10 @@ static int __init __parse_crashkernel(char 
*cmdline,

  

Re: [v2,1/2] refactor code parsing size based on memory range

2016-07-05 Thread Hari Bathini



On 07/05/2016 10:48 AM, Michael Ellerman wrote:

On 06/24/2016 10:56 AM, Michael Ellerman wrote:

On Wed, 2016-22-06 at 19:25:26 UTC, Hari Bathini wrote:

...

While the code is moved to kernel/params.c file, there is no change in logic
for crashkernel parameter parsing as the moved code is invoked with function
calls at appropriate places.


Hi Michael,


Are you sure that's true?


Yes. I tested it.



The old code would return -EINVAL from parse_crashkernel_mem() for any
error, regardless of whether it had already parsed some of the string.

eg:


diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 56b3ed0..d43f5cc 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1083,59 +1083,9 @@ static int __init parse_crashkernel_mem(char *cmdline,
char *cur = cmdline, *tmp;
   
   	/* for each entry of the comma-separated list */

-   do {
-   unsigned long long start, end = ULLONG_MAX, size;
-
-   /* get the start of the range */
-   start = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (*cur != '-') {
-   pr_warn("crashkernel: '-' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   /* if no ':' is here, than we read the end */
-   if (*cur != ':') {
-   end = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }

So eg, if I give it "128M-foo" it will modify cur, and then error out here ^


It does modify cur (local variable) but that would have no bearing on 
parsing logic

as we are returning immediately..


You've changed that to:


+   *crash_size = parse_mem_range_size("crashkernel", , system_ram);
+   if (cur == cmdline)
+   return -EINVAL;

Which only returns EINVAL if cur is not modified at all.


I think the confusion is with the same local variable cur in 
parse_crashkernel_mem()

& parse_mem_range_size() functions.

We modified cur (local variable) in parse_mem_range_size() but the 
output parameter (char **str)

remains unchanged unless we find a match.

Thanks
Hari


And looking below:


diff --git a/kernel/params.c b/kernel/params.c
index a6d6149..84e40ae 100644
--- a/kernel/params.c
+++ b/kernel/params.c

...

+unsigned long long __init parse_mem_range_size(const char *param,
+  char **str,
+  unsigned long long system_ram)
+{
+   char *cur = *str, *tmp;
+   unsigned long long mem_size = 0;
+
+   /* for each entry of the comma-separated list */
+   do {
+   unsigned long long start, end = ULLONG_MAX, size;
+
+   /* get the start of the range */
+   start = memparse(cur, );
+   if (cur == tmp) {
+   printk(KERN_INFO "%s: Memory value expected\n", param);
+   return mem_size;
+   }
+   cur = tmp;
+   if (*cur != '-') {
+   printk(KERN_INFO "%s: '-' expected\n", param);
+   return mem_size;
+   }
+   cur++;
+
+   /* if no ':' is here, than we read the end */
+   if (*cur != ':') {
+   end = memparse(cur, );
+   if (cur == tmp) {
+   printk(KERN_INFO "%s: Memory value expected\n",
+   param);
+   return mem_size;

If we error out here for example, we have modified cur, so the code above
*won't* return EINVAL.




Which looks like a behaviour change to me?

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RESEND][PATCH v2 1/2] kexec: refactor code parsing size based on memory range

2016-08-04 Thread Hari Bathini

Hi Dave


Thanks for the review..


On Thursday 04 August 2016 02:56 PM, Dave Young wrote:

Hi Hari,

On 08/04/16 at 01:03am, Hari Bathini wrote:

crashkernel parameter supports different syntaxes to specify the amount
of memory to be reserved for kdump kernel. Below is one of the supported
syntaxes that needs parsing to find the memory size to reserve, based on
memory range:

 crashkernel=:[,:,...]

While such parsing is implemented for crashkernel parameter, it applies
to other parameters, like fadump_reserve_mem=, which could use similar
syntax. This patch moves crashkernel's parsing code for above syntax to
to kernel/params.c file for reuse. Two functions is_param_range_based()
and parse_mem_range_size() are added to kernel/params.c file for this
purpose.

Any parameter that uses the above syntax can use is_param_range_based()
function to validate the syntax and parse_mem_range_size() function to
get the parsed memory size. While some code is moved to kernel/params.c
file, there is no change functionality wise in parsing the crashkernel
parameter.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v1:
1. Updated changelog

  include/linux/kernel.h |5 +++
  kernel/kexec_core.c|   63 +++-
  kernel/params.c|   96 
  3 files changed, 106 insertions(+), 58 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a611..2df7ba2 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -435,6 +435,11 @@ extern char *get_options(const char *str, int nints, int 
*ints);
  extern unsigned long long memparse(const char *ptr, char **retptr);
  extern bool parse_option_str(const char *str, const char *option);
  
+extern bool __init is_param_range_based(const char *cmdline);

+extern unsigned long long __init parse_mem_range_size(const char *param,
+ char **str,
+ unsigned long long 
system_ram);
+
  extern int core_kernel_text(unsigned long addr);
  extern int core_kernel_data(unsigned long addr);
  extern int __kernel_text_address(unsigned long addr);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 5616755..3a74024 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1104,59 +1104,9 @@ static int __init parse_crashkernel_mem(char *cmdline,
char *cur = cmdline, *tmp;
  
  	/* for each entry of the comma-separated list */

-   do {
-   unsigned long long start, end = ULLONG_MAX, size;
-
-   /* get the start of the range */
-   start = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (*cur != '-') {
-   pr_warn("crashkernel: '-' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   /* if no ':' is here, than we read the end */
-   if (*cur != ':') {
-   end = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("crashkernel: Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (end <= start) {
-   pr_warn("crashkernel: end <= start\n");
-   return -EINVAL;
-   }
-   }
-
-   if (*cur != ':') {
-   pr_warn("crashkernel: ':' expected\n");
-   return -EINVAL;
-   }
-   cur++;
-
-   size = memparse(cur, );
-   if (cur == tmp) {
-   pr_warn("Memory value expected\n");
-   return -EINVAL;
-   }
-   cur = tmp;
-   if (size >= system_ram) {
-   pr_warn("crashkernel: invalid size\n");
-   return -EINVAL;
-   }
-
-   /* match ? */
-   if (system_ram >= start && system_ram < end) {
-   *crash_size = size;
-   break;
-   }
-   } while (*cur++ == ',');
+   *crash_size = parse_mem_range_size("crashkernel", , system_ram);
+   if (cur == cmdline)
+   return -EINVAL;
  
  	if (*crash_size > 0) {

while (*cur && *cur != ' ' && *cur != '@')
@@ -1293,7 +1243,6 @@ static int __init __parse_crashkernel(char *cmdline,
 const char *name,
 const char *suffix)
  {

Re: [RESEND][PATCH v2 2/2] powerpc/fadump: parse fadump reserve memory size based on memory range

2016-08-04 Thread Hari Bathini


On Thursday 04 August 2016 03:15 PM, Michael Ellerman wrote:

Hari Bathini <hbath...@linux.vnet.ibm.com> writes:
...

  /**
   * fadump_calculate_reserve_size(): reserve variable boot area 5% of System 
RAM
   *
@@ -212,12 +262,17 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
  {
unsigned long size;
  
+	/* sets fw_dump.reserve_bootvar */

+   parse_fadump_reserve_mem();
+
/*
 * Check if the size is specified through fadump_reserve_mem= cmdline
 * option. If yes, then use that.
 */
if (fw_dump.reserve_bootvar)
return fw_dump.reserve_bootvar;
+   else
+   printk(KERN_INFO "fadump: calculating default boot size\n");
  
  	/* divide by 20 to get 5% of value */

size = memblock_end_of_DRAM() / 20;

The code already knows how to reserve 5% based on the size of the machine's
memory, as long as no commandline parameter is passed. So why can't we
just use that logic?


Hi Michael,

That is the default value reserved but not a good enough value for
every case. It is a bit difficult to come up with a robust formula
that works for every case as new kernel changes could make the
values obsolete. But it won't be all that difficult to find values that
work for different memory ranges for a given kernel version.
Passing that as range based input with "fadump_reserve_mem"
parameter would work for every memory configuration on a
given system, which is what this patch is trying to provide..

Thanks
Hari



cheers





Re: [RESEND][PATCH v2 2/2] powerpc/fadump: parse fadump reserve memory size based on memory range

2016-08-08 Thread Hari Bathini



On Friday 05 August 2016 12:23 AM, Hari Bathini wrote:


On Thursday 04 August 2016 03:15 PM, Michael Ellerman wrote:

Hari Bathini <hbath...@linux.vnet.ibm.com> writes:
...

  /**
   * fadump_calculate_reserve_size(): reserve variable boot area 5% 
of System RAM

   *
@@ -212,12 +262,17 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)

  {
  unsigned long size;
  +/* sets fw_dump.reserve_bootvar */
+parse_fadump_reserve_mem();
+
  /*
   * Check if the size is specified through fadump_reserve_mem= 
cmdline

   * option. If yes, then use that.
   */
  if (fw_dump.reserve_bootvar)
  return fw_dump.reserve_bootvar;
+else
+printk(KERN_INFO "fadump: calculating default boot size\n");
/* divide by 20 to get 5% of value */
  size = memblock_end_of_DRAM() / 20;
The code already knows how to reserve 5% based on the size of the 
machine's

memory, as long as no commandline parameter is passed. So why can't we
just use that logic?


Hi Michael,

That is the default value reserved but not a good enough value for
every case. It is a bit difficult to come up with a robust formula
that works for every case as new kernel changes could make the
values obsolete. But it won't be all that difficult to find values that
work for different memory ranges for a given kernel version.
Passing that as range based input with "fadump_reserve_mem"
parameter would work for every memory configuration on a
given system, which is what this patch is trying to provide..



Hi Michael,

You want me to add this to the changelog on respin?

Thanks
Hari


Thanks
Hari



cheers







Re: [RESEND][PATCH v2 2/2] powerpc/fadump: parse fadump reserve memory size based on memory range

2016-08-08 Thread Hari Bathini



On Monday 08 August 2016 02:26 PM, Michael Ellerman wrote:

Hari Bathini <hbath...@linux.vnet.ibm.com> writes:

On Friday 05 August 2016 12:23 AM, Hari Bathini wrote:

On Thursday 04 August 2016 03:15 PM, Michael Ellerman wrote:

The code already knows how to reserve 5% based on the size of the
machine's
memory, as long as no commandline parameter is passed. So why can't we
just use that logic?

That is the default value reserved but not a good enough value for
every case. It is a bit difficult to come up with a robust formula
that works for every case as new kernel changes could make the
values obsolete. But it won't be all that difficult to find values that
work for different memory ranges for a given kernel version.
Passing that as range based input with "fadump_reserve_mem"
parameter would work for every memory configuration on a
given system, which is what this patch is trying to provide..

You want me to add this to the changelog on respin?


Hi Michael,


I'm not really convinced.

Distros are going to want to specify a fixed set of values for different
memory sizes, at least that's what I've seen in the past with kdump. So
I don't see why we can't just do that in the kernel with a formula based
on memory size, and maybe some other information.


Agreed. Such support would be great but this patch is adding support
for a new syntax for an existing parameter which should still be good
to have?


Maybe the formula is more complicated than 5% of RAM, but it shouldn't
be *that* much more complicated.


Depending on what all kernel versions that need support, this can
get ugly? I could be completely wrong though..

Thanks
Hari


cheers

___
kexec mailing list
ke...@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec





[PATCH v2 1/2] fadump: reduce memory consumption for capture kernel

2017-02-07 Thread Hari Bathini
In case of fadump, capture (fadump) kernel boots like a normal kernel.
While this has its advantages, the capture kernel would initialize all
the components like normal kernel, which may not necessarily be needed
for a typical dump capture kernel. So, fadump capture kernel ends up
needing more memory than a typical (read kdump) capture kernel to boot.

This can be overcome by introducing parameters like fadump_nr_cpus=1,
similar to nr_cpus=1 parameter, applicable only when fadump is active.
But this approach needs introduction of special parameters applicable
only when fadump is active (capture kernel), for every parameter that
reduces memory/resource consumption.

A better approach would be to pass extra parameters to fadump capture
kernel. As firmware leaves the memory contents intact from the time of
crash till the new kernel is booted up, parameters to append to capture
kernel can be saved in real memory region and retrieved later when the
capture kernel is in its early boot process for appending to command
line parameters.

This patch introduces a new node /sys/kernel/fadump_cmdline_append to
specify the parameters to pass to fadump capture kernel, saves them in
real memory region and appends these parameters to capture kernel early
in its boot process.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v1:
* Not changing dump format version to keep compatibility intact. Using
  start and end markers instead, to check sanity of handover area.
* Checking for memory overlap with current kernel before setting up
  a handover area.


 arch/powerpc/include/asm/fadump.h |   31 +++
 arch/powerpc/kernel/fadump.c  |  158 +
 arch/powerpc/kernel/prom.c|   22 +
 3 files changed, 210 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 0031806..e6b3dc0 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -24,6 +24,8 @@
 
 #ifdef CONFIG_FA_DUMP
 
+#include 
+
 /*
  * The RMA region will be saved for later dumping when kernel crashes.
  * RMA is Real Mode Area, the first block of logical memory address owned
@@ -126,6 +128,13 @@ struct fw_dump {
/* cmd line option during boot */
unsigned long   reserve_bootvar;
 
+   /*
+* Area to pass info to capture (fadump) kernel. For now,
+* we are only passing parameters to append.
+*/
+   unsigned long   handover_area_start;
+   unsigned long   handover_area_size;
+
unsigned long   fadumphdr_addr;
unsigned long   cpu_notes_buf;
unsigned long   cpu_notes_buf_size;
@@ -159,6 +168,27 @@ static inline u64 str_to_u64(const char *str)
 #define FADUMP_CRASH_INFO_MAGICSTR_TO_HEX("FADMPINF")
 #define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
 
+/*
+ * Start address for the area to pass off certain configuration details
+ * like parameters to append to the commandline for a capture (fadump) kernel.
+ * Will refer to this area as handover area henceforth. Setting start address
+ * of handover area to 128MB as this area needs to be accessed in realmode.
+ */
+#define FADUMP_HANDOVER_AREA_START (1UL << 27)
+#define FADUMP_HANDOVER_AREA_SIZE  (sizeof(struct fadump_handover_info) \
++ H_END_MARKER_SIZE)
+
+#define H_AREA_START_MARKERSTR_TO_HEX("HDRSTART")
+#define H_AREA_END_MARKER  STR_TO_HEX("HOVEREND")
+#define H_END_MARKER_SIZE  8
+
+/* config info to be passed to capture kernel */
+struct fadump_handover_info {
+   u64 start_marker;
+   u64 size;
+   charparams[COMMAND_LINE_SIZE/2];
+};
+
 /* The firmware-assisted dump format.
  *
  * The register save area is an area in the partition's memory used to preserve
@@ -200,6 +230,7 @@ struct fad_crash_memory_ranges {
 
 extern int early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data);
+extern char *get_fadump_parameters_realmode(void);
 extern int fadump_reserve_mem(void);
 extern int setup_fadump(void);
 extern int is_fadump_active(void);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8f0c7c5..eab26e9 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -41,7 +41,6 @@
 #include 
 #include 
 #include 
-#include 
 
 static struct fw_dump fw_dump;
 static struct fadump_mem_struct fdm;
@@ -51,6 +50,8 @@ static DEFINE_MUTEX(fadump_mutex);
 struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
 int crash_mem_ranges;
 
+extern char _stext[], _end[];
+
 /* Scan the Firmware Assisted dump configuration details. */
 int __init early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data)
@@ -74,6 +75,9 @@

[PATCH v2 2/2] fadump: update documentation about introduction of handover area

2017-02-07 Thread Hari Bathini
Update documentation about introduction of handover area that includes
configuration details like extra parameters to append to capture
kernel.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 Documentation/powerpc/firmware-assisted-dump.txt |   83 ++
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/Documentation/powerpc/firmware-assisted-dump.txt 
b/Documentation/powerpc/firmware-assisted-dump.txt
index 3007bc9..6c6a0e9 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -67,11 +67,17 @@ as follows:
 
 -- The freshly booted kernel will notice that there is a new
node (ibm,dump-kernel) in the device tree, indicating that
-   there is crash data available from a previous boot. During
-   the early boot OS will reserve rest of the memory above
-   boot memory size effectively booting with restricted memory
-   size. This will make sure that the second kernel will not
-   touch any of the dump memory area.
+   there is crash data available from a previous boot. This
+   second kernel, where crash data is available from previous
+   boot, is referred to as capture kernel. The capture kernel,
+   early during the boot process, looks for a handover area
+   (see fig. 2), saved by the first kernel to be handed over
+   to it. This handover area contains certain config info like
+   extra parameters to append to the capture kernel. The capture
+   kernel applies this configuration accordingly. Later, reserves
+   rest of the memory above boot memory size effectively booting
+   with restricted memory size. This will make sure that the
+   capture kernel will not touch any of the dump memory area.
 
 -- User-space tools will read /proc/vmcore to obtain the contents
of memory, which holds the previous crashed kernel dump in ELF
@@ -113,15 +119,18 @@ crash does occur.
 
   o Memory Reservation during first kernel
 
-  Low memoryTop of memory
-  0  boot memory size   |
-  |   |   |<--Reserved dump area -->|
-  V   V   |   Permanent Reservation V
-  +---+--/ /--+---++---++
-  |   |   |CPU|HPTE|  DUMP |ELF |
-  +---+--/ /--+---++---++
-|   ^
-|   |
+  Low memory  Top of memory
+  0   |
+  | Handover area |
+  |   |   |
+  |   |  boot memory size   |<-- Reserved dump area-->|
+  |   |  |  |  Permanent Reservation  |
+  V   V  VV
+  +--+-++--/ /--+---++---++
+  |  | ||   |CPU|HPTE|  DUMP |ELF |
+  +--+-++--/ /--+---++---++
+  |  ___|   ^
+   \/   |
 \   /
  ---
   Boot memory content gets transferred to
@@ -129,18 +138,21 @@ crash does occur.
   crash
Fig. 1
 
-  o Memory Reservation during second kernel after crash
-
-  Low memoryTop of memory
-  0  boot memory size   |
-  |   |<- Reserved dump area --- -->|
-  V   V V
-  +---+--/ /--+---++---++
-  |   |   |CPU|HPTE|  DUMP |ELF |
-  +---+--/ /--+---++---++
-||
-VV
-   Used by second/proc/vmcore
+  o Memory Reservation during capture (fadump) kernel after crash
+
+  Low memory  Top of memory
+  0   |
+  |  Handover area|
+  |   |   |
+  |   |boot memory size   |
+  |   | |<- Reserved dump area --- -->|
+  V   V V V
+  +--+-++--/ /--+---++---++
+  |  | || 

Re: [PATCH v1 1/2] fadump: reduce memory consumption for capture kernel

2017-02-07 Thread Hari Bathini

Hi Mahesh,


On Tuesday 31 January 2017 01:05 AM, Mahesh Jagannath Salgaonkar wrote:

On 01/30/2017 10:14 PM, Hari Bathini wrote:

In case of fadump, capture (fadump) kernel boots like a normal kernel.
While this has its advantages, the capture kernel would initialize all
the components like normal kernel, which may not necessarily be needed
for a typical dump capture kernel. So, fadump capture kernel ends up
needing more memory than a typical (read kdump) capture kernel to boot.

...

+#define FADUMP_FORMAT_VERSION  0x0002

Why 0x0002 ? Does Phyp now support new version of dump format ? We
should be more careful not to break backward compatibility.


Dump format version has not changed in Phyp. Undone the change in v2
to keep backward compatibility intact.


+static ssize_t fadump_params_show(struct kobject *kobj,
+  struct kobj_attribute *attr,
+  char *buf)
+{
+   return sprintf(buf, "%s\n",
+   get_fadump_params_buf(__va(fw_dump.handover_area_start)));

May be we should show current cmdline + fadump append params.


I think it is better to display only append parameters as current 
cmdline parameters

may not be accurate always?

Thanks
Hari



Re: [PATCH v4 2/5] ia64: reuse append_elf_note() and final_note() functions

2017-01-24 Thread Hari Bathini



On Friday 20 January 2017 11:17 AM, Michael Ellerman wrote:

Hari Bathini <hbath...@linux.vnet.ibm.com> writes:


Get rid of multiple definitions of append_elf_note() & final_note()
functions. Reuse these functions compiled under CONFIG_CRASH_CORE
Also, define Elf_Word and use it instead of generic u32 or the more
specific Elf64_Word.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v3:
* Dropped hard-coded values and used DIV_ROUND_UP().

Changes from v2:
* Added a definition for Elf_Word.
* Used IA64 version of append_elf_note() and final_note() functions.


  arch/ia64/kernel/crash.c   |   22 --
  include/linux/crash_core.h |4 
  include/linux/elf.h|2 ++
  kernel/crash_core.c|   34 ++
  kernel/kexec_core.c|   28 
  5 files changed, 20 insertions(+), 70 deletions(-)

Do the powerpc patches later in the series actually depend on this one?
Or is this just an unrelated cleanup?

As it is I can't merge the series until we at least get an ack on this
from the ia64 folks.

If you can just split this out as a separate patch that would make it a
lot easier to get the rest merged.



Hi Michael,

append_elf_note() & final_note() functions were defined statically at 
three different places,
arch/powerpc/kernel/fadump.c being one of them. With my changes, I would 
need to add
a fourth static definition if I ignore this cleanup. So, I preferred to 
clean this up...


Let me ping IA64 folks one last time. Will do a respin without the 
cleanup if I don't get

any response from them by end of this week..

Thanks
Hari



Re: [PATCH v4 2/5] ia64: reuse append_elf_note() and final_note() functions

2017-01-24 Thread Hari Bathini



On Tuesday 17 January 2017 10:36 PM, Hari Bathini wrote:



On Friday 06 January 2017 07:33 AM, Dave Young wrote:

On 01/05/17 at 11:01pm, Hari Bathini wrote:

Get rid of multiple definitions of append_elf_note() & final_note()
functions. Reuse these functions compiled under CONFIG_CRASH_CORE
Also, define Elf_Word and use it instead of generic u32 or the more
specific Elf64_Word.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v3:
* Dropped hard-coded values and used DIV_ROUND_UP().

Changes from v2:
* Added a definition for Elf_Word.
* Used IA64 version of append_elf_note() and final_note() functions.


  arch/ia64/kernel/crash.c   |   22 --
  include/linux/crash_core.h |4 
  include/linux/elf.h|2 ++
  kernel/crash_core.c|   34 ++
  kernel/kexec_core.c|   28 
  5 files changed, 20 insertions(+), 70 deletions(-)

diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index 2955f35..75859a0 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -27,28 +27,6 @@ static int kdump_freeze_monarch;
  static int kdump_on_init = 1;
  static int kdump_on_fatal_mca = 1;
  -static inline Elf64_Word
-*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void 
*data,

-size_t data_len)
-{
-struct elf_note *note = (struct elf_note *)buf;
-note->n_namesz = strlen(name) + 1;
-note->n_descsz = data_len;
-note->n_type   = type;
-buf += (sizeof(*note) + 3)/4;
-memcpy(buf, name, note->n_namesz);
-buf += (note->n_namesz + 3)/4;
-memcpy(buf, data, data_len);
-buf += (data_len + 3)/4;
-return buf;
-}
-
-static void
-final_note(void *buf)
-{
-memset(buf, 0, sizeof(struct elf_note));
-}
-
  extern void ia64_dump_cpu_regs(void *);
static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 18d0f94..541a197 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -55,6 +55,10 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
  extern size_t vmcoreinfo_size;
  extern size_t vmcoreinfo_max_size;
  +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int 
type,

+  void *data, size_t data_len);
+void final_note(Elf_Word *buf);
+
  int __init parse_crashkernel(char *cmdline, unsigned long long 
system_ram,
  unsigned long long *crash_size, unsigned long long 
*crash_base);
  int parse_crashkernel_high(char *cmdline, unsigned long long 
system_ram,

diff --git a/include/linux/elf.h b/include/linux/elf.h
index 20fa8d8..ba069e8 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -29,6 +29,7 @@ extern Elf32_Dyn _DYNAMIC [];
  #define elf_noteelf32_note
  #define elf_addr_tElf32_Off
  #define Elf_HalfElf32_Half
+#define Elf_WordElf32_Word
#else
  @@ -39,6 +40,7 @@ extern Elf64_Dyn _DYNAMIC [];
  #define elf_noteelf64_note
  #define elf_addr_tElf64_Off
  #define Elf_HalfElf64_Half
+#define Elf_WordElf64_Word
#endif
  diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 80b441d..362dace 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -291,32 +291,26 @@ int __init parse_crashkernel_low(char *cmdline,
  "crashkernel=", suffix_tbl[SUFFIX_LOW]);
  }
  -static u32 *append_elf_note(u32 *buf, char *name, unsigned int type,
-void *data, size_t data_len)
+Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int 
type,

+  void *data, size_t data_len)
  {
-struct elf_note note;
-
-note.n_namesz = strlen(name) + 1;
-note.n_descsz = data_len;
-note.n_type   = type;
-memcpy(buf, , sizeof(note));
-buf += (sizeof(note) + 3)/4;
-memcpy(buf, name, note.n_namesz);
-buf += (note.n_namesz + 3)/4;
-memcpy(buf, data, note.n_descsz);
-buf += (note.n_descsz + 3)/4;
+struct elf_note *note = (struct elf_note *)buf;
+
+note->n_namesz = strlen(name) + 1;
+note->n_descsz = data_len;
+note->n_type   = type;
+buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word));
+memcpy(buf, name, note->n_namesz);
+buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word));
+memcpy(buf, data, data_len);
+buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word));
return buf;
  }
  -static void final_note(u32 *buf)
+void final_note(Elf_Word *buf)
  {
-struct elf_note note;
-
-note.n_namesz = 0;
-note.n_descsz = 0;
-note.n_type   = 0;
-memcpy(buf, , sizeof(note));
+memset(buf, 0, sizeof(struct elf_note));
  }
static void update_vmcoreinfo_note(void)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 2179a16..263d764 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -990,34 +990,6 @@ int crash_shrink_memory(unsigned long new_size)
  return ret;

[PATCH v1 1/2] fadump: reduce memory consumption for capture kernel

2017-01-30 Thread Hari Bathini
In case of fadump, capture (fadump) kernel boots like a normal kernel.
While this has its advantages, the capture kernel would initialize all
the components like normal kernel, which may not necessarily be needed
for a typical dump capture kernel. So, fadump capture kernel ends up
needing more memory than a typical (read kdump) capture kernel to boot.

This can be overcome by introducing parameters like fadump_nr_cpus=1,
similar to nr_cpus=1 parameter, applicable only when fadump is active.
But this approach needs introduction of special parameters applicable
only when fadump is active (capture kernel), for every parameter that
reduces memory/resource consumption.

A better approach would be to pass extra parameters to fadump capture
kernel. As firmware leaves the memory contents intact from the time of
crash till the new kernel is booted up, parameters to append to capture
kernel can be saved in real memory region and retrieved later when the
capture kernel is in its early boot process for appending to command
line parameters.

This patch introduces a new node /sys/kernel/fadump_cmdline_append to
specify the parameters to pass to fadump capture kernel, saves them in
real memory region and appends these parameters to capture kernel early
in its boot process.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/fadump.h |   28 
 arch/powerpc/kernel/fadump.c  |  125 -
 arch/powerpc/kernel/prom.c|   19 ++
 3 files changed, 170 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 0031806..484083a 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -24,6 +24,8 @@
 
 #ifdef CONFIG_FA_DUMP
 
+#include 
+
 /*
  * The RMA region will be saved for later dumping when kernel crashes.
  * RMA is Real Mode Area, the first block of logical memory address owned
@@ -45,6 +47,8 @@
 
 #define memblock_num_regions(memblock_type)(memblock.memblock_type.cnt)
 
+#define FADUMP_FORMAT_VERSION  0x0002
+
 /* Firmware provided dump sections */
 #define FADUMP_CPU_STATE_DATA  0x0001
 #define FADUMP_HPTE_REGION 0x0002
@@ -126,6 +130,13 @@ struct fw_dump {
/* cmd line option during boot */
unsigned long   reserve_bootvar;
 
+   /*
+* Area to pass info to capture (fadump) kernel. For now,
+* we are only passing parameters to append.
+*/
+   unsigned long   handover_area_start;
+   unsigned long   handover_area_size;
+
unsigned long   fadumphdr_addr;
unsigned long   cpu_notes_buf;
unsigned long   cpu_notes_buf_size;
@@ -159,6 +170,22 @@ static inline u64 str_to_u64(const char *str)
 #define FADUMP_CRASH_INFO_MAGICSTR_TO_HEX("FADMPINF")
 #define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
 
+/*
+ * The start address for an area to pass off certain configuration details
+ * like parameters to append to the commandline for a capture (fadump) kernel.
+ * Setting it to 128MB as this needs to be accessed in realmode.
+ */
+#define FADUMP_HANDOVER_AREA_START (1UL << 27)
+
+#define FADUMP_PARAMS_AREA_MARKER  STR_TO_HEX("FADMPCMD")
+#define FADUMP_PARAMS_INFO_SIZEsizeof(struct 
fadump_params_info)
+
+/* fadump parameters info */
+struct fadump_params_info {
+   u64 params_area_marker;
+   charparams[COMMAND_LINE_SIZE/2];
+};
+
 /* The firmware-assisted dump format.
  *
  * The register save area is an area in the partition's memory used to preserve
@@ -200,6 +227,7 @@ struct fad_crash_memory_ranges {
 
 extern int early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data);
+extern char *get_fadump_parameters_realmode(void);
 extern int fadump_reserve_mem(void);
 extern int setup_fadump(void);
 extern int is_fadump_active(void);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8f0c7c5..bc82d22 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -41,7 +41,6 @@
 #include 
 #include 
 #include 
-#include 
 
 static struct fw_dump fw_dump;
 static struct fadump_mem_struct fdm;
@@ -74,6 +73,9 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
fw_dump.fadump_supported = 1;
fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
 
+   fw_dump.handover_area_start = FADUMP_HANDOVER_AREA_START;
+   fw_dump.handover_area_size = PAGE_ALIGN(FADUMP_PARAMS_INFO_SIZE);
+
/*
 * The 'ibm,kernel-dump' rtas node is present only if there is
 * dump data waiting for us.
@@ -147,7 +149,7 @@ static unsigned long init_fadump_mem_struct(struct 
fadump_mem_struct *fdm,
memset(fdm, 0, sizeof(struct fadump_mem_struct));
addr = addr & PAGE_MASK;
 
-   fdm->header.dump_form

[PATCH v1 1/2] fadump: reduce memory consumption for capture kernel

2017-01-30 Thread Hari Bathini
In case of fadump, capture (fadump) kernel boots like a normal kernel.
While this has its advantages, the capture kernel would initialize all
the components like normal kernel, which may not necessarily be needed
for a typical dump capture kernel. So, fadump capture kernel ends up
needing more memory than a typical (read kdump) capture kernel to boot.

This can be overcome by introducing parameters like fadump_nr_cpus=1,
similar to nr_cpus=1 parameter, applicable only when fadump is active.
But this approach needs introduction of special parameters applicable
only when fadump is active (capture kernel), for every parameter that
reduces memory/resource consumption.

A better approach would be to pass extra parameters to fadump capture
kernel. As firmware leaves the memory contents intact from the time of
crash till the new kernel is booted up, parameters to append to capture
kernel can be saved in real memory region and retrieved later when the
capture kernel is in its early boot process for appending to command
line parameters.

This patch introduces a new node /sys/kernel/fadump_cmdline_append to
specify the parameters to pass to fadump capture kernel, saves them in
real memory region and appends these parameters to capture kernel early
in its boot process.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/fadump.h |   28 
 arch/powerpc/kernel/fadump.c  |  125 -
 arch/powerpc/kernel/prom.c|   19 ++
 3 files changed, 170 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 0031806..484083a 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -24,6 +24,8 @@
 
 #ifdef CONFIG_FA_DUMP
 
+#include 
+
 /*
  * The RMA region will be saved for later dumping when kernel crashes.
  * RMA is Real Mode Area, the first block of logical memory address owned
@@ -45,6 +47,8 @@
 
 #define memblock_num_regions(memblock_type)(memblock.memblock_type.cnt)
 
+#define FADUMP_FORMAT_VERSION  0x0002
+
 /* Firmware provided dump sections */
 #define FADUMP_CPU_STATE_DATA  0x0001
 #define FADUMP_HPTE_REGION 0x0002
@@ -126,6 +130,13 @@ struct fw_dump {
/* cmd line option during boot */
unsigned long   reserve_bootvar;
 
+   /*
+* Area to pass info to capture (fadump) kernel. For now,
+* we are only passing parameters to append.
+*/
+   unsigned long   handover_area_start;
+   unsigned long   handover_area_size;
+
unsigned long   fadumphdr_addr;
unsigned long   cpu_notes_buf;
unsigned long   cpu_notes_buf_size;
@@ -159,6 +170,22 @@ static inline u64 str_to_u64(const char *str)
 #define FADUMP_CRASH_INFO_MAGICSTR_TO_HEX("FADMPINF")
 #define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
 
+/*
+ * The start address for an area to pass off certain configuration details
+ * like parameters to append to the commandline for a capture (fadump) kernel.
+ * Setting it to 128MB as this needs to be accessed in realmode.
+ */
+#define FADUMP_HANDOVER_AREA_START (1UL << 27)
+
+#define FADUMP_PARAMS_AREA_MARKER  STR_TO_HEX("FADMPCMD")
+#define FADUMP_PARAMS_INFO_SIZEsizeof(struct 
fadump_params_info)
+
+/* fadump parameters info */
+struct fadump_params_info {
+   u64 params_area_marker;
+   charparams[COMMAND_LINE_SIZE/2];
+};
+
 /* The firmware-assisted dump format.
  *
  * The register save area is an area in the partition's memory used to preserve
@@ -200,6 +227,7 @@ struct fad_crash_memory_ranges {
 
 extern int early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data);
+extern char *get_fadump_parameters_realmode(void);
 extern int fadump_reserve_mem(void);
 extern int setup_fadump(void);
 extern int is_fadump_active(void);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8f0c7c5..bc82d22 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -41,7 +41,6 @@
 #include 
 #include 
 #include 
-#include 
 
 static struct fw_dump fw_dump;
 static struct fadump_mem_struct fdm;
@@ -74,6 +73,9 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
fw_dump.fadump_supported = 1;
fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
 
+   fw_dump.handover_area_start = FADUMP_HANDOVER_AREA_START;
+   fw_dump.handover_area_size = PAGE_ALIGN(FADUMP_PARAMS_INFO_SIZE);
+
/*
 * The 'ibm,kernel-dump' rtas node is present only if there is
 * dump data waiting for us.
@@ -147,7 +149,7 @@ static unsigned long init_fadump_mem_struct(struct 
fadump_mem_struct *fdm,
memset(fdm, 0, sizeof(struct fadump_mem_struct));
addr = addr & PAGE_MASK;
 
-   fdm->header.dump_form

[PATCH v1 2/2] fadump: update documentation about introduction of handover area

2017-01-30 Thread Hari Bathini
Update documentation about introduction of handover area that includes
configuration details like extra parameters to append to capture
kernel.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 Documentation/powerpc/firmware-assisted-dump.txt |   83 ++
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/Documentation/powerpc/firmware-assisted-dump.txt 
b/Documentation/powerpc/firmware-assisted-dump.txt
index 3007bc9..2da3a3f 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -67,11 +67,17 @@ as follows:
 
 -- The freshly booted kernel will notice that there is a new
node (ibm,dump-kernel) in the device tree, indicating that
-   there is crash data available from a previous boot. During
-   the early boot OS will reserve rest of the memory above
-   boot memory size effectively booting with restricted memory
-   size. This will make sure that the second kernel will not
-   touch any of the dump memory area.
+   there is crash data available from a previous boot. This
+   second kernel, where crash data is available from previous
+   boot, is referred to as capture kernel. The capture kernel,
+   early during the boot process, looks for a handover area
+   (see fig. 2), saved by the first kernel to be handed over
+   to it. This handover area contains certain config info like
+   extra parameters to append to the capture kernel. The capture
+   kernel applies this configuration accordingly. Later, reserves
+   rest of the memory above boot memory size effectively booting
+   with restricted memory size. This will make sure that the
+   capture kernel will not touch any of the dump memory area.
 
 -- User-space tools will read /proc/vmcore to obtain the contents
of memory, which holds the previous crashed kernel dump in ELF
@@ -113,15 +119,18 @@ crash does occur.
 
   o Memory Reservation during first kernel
 
-  Low memoryTop of memory
-  0  boot memory size   |
-  |   |   |<--Reserved dump area -->|
-  V   V   |   Permanent Reservation V
-  +---+--/ /--+---++---++
-  |   |   |CPU|HPTE|  DUMP |ELF |
-  +---+--/ /--+---++---++
-|   ^
-|   |
+  Low memory  Top of memory
+  0   |
+  | Handover area |
+  |   |   |
+  |   |  boot memory size   |<-- Reserved dump area-->|
+  |   |  |  |  Permanent Reservation  |
+  V   V  VV 
+  +--+-++--/ /--+---++---++
+  |  | ||   |CPU|HPTE|  DUMP |ELF |
+  +--+-++--/ /--+---++---++
+  |  ___|   ^
+   \/   |
 \   /
  ---
   Boot memory content gets transferred to
@@ -129,18 +138,21 @@ crash does occur.
   crash
Fig. 1
 
-  o Memory Reservation during second kernel after crash
-
-  Low memoryTop of memory
-  0  boot memory size   |
-  |   |<- Reserved dump area --- -->|
-  V   V V
-  +---+--/ /--+---++---++
-  |   |   |CPU|HPTE|  DUMP |ELF |
-  +---+--/ /--+---++---++
-||
-VV
-   Used by second/proc/vmcore
+  o Memory Reservation during capture (fadump) kernel after crash
+
+  Low memory  Top of memory
+  0   |
+  |  Handover area|
+  |   |   |
+  |   |boot memory size   |
+  |   | |<- Reserved dump area --- -->|
+  V   V V V
+  +--+-++--/ /--+---++---++
+  |  | || 

Re: [PATCH v4 2/5] ia64: reuse append_elf_note() and final_note() functions

2017-01-25 Thread Hari Bathini



On Tuesday 24 January 2017 11:53 PM, Tony Luck wrote:

On Tue, Jan 24, 2017 at 10:11 AM, Hari Bathini
<hbath...@linux.vnet.ibm.com> wrote:


Hello IA64 folks,

Could you please review this patch..?

It looks OK in principal.  My lab is in partial disarray at the
moment (just got back from a sabbatical) so I can't test
build and boot. Have you cross-compiled it (or gotten a success
build report from zero-day)?


I haven't gotten a success/failure build report from zero-day. Not sure 
what to make of it.

But I did try cross-compiling and it was successful. Should that do?

Thanks
Hari



If you have ... then add an Acked-by: Tony Luck <tony.l...@intel.com>

-Tony





[PATCH v2] powerpc/fadump: set an upper limit for boot memory size

2017-02-24 Thread Hari Bathini
By default, 5% of system RAM is reserved for preserving boot memory.
Alternatively, a user can specify the amount of memory to reserve.
See Documentation/powerpc/firmware-assisted-dump.txt for details. In
addition to the memory reserved for preserving boot memory, some more
memory is reserved, to save HPTE region, CPU state data and ELF core
headers.

Memory Reservation during first kernel looks like below:

  Low memoryTop of memory
  0  boot memory size   |
  |   |   |<--Reserved dump area -->|
  V   V   |   Permanent Reservation V
  +---+--/ /--+---++---++
  |   |   |CPU|HPTE|  DUMP |ELF |
  +---+--/ /--+---++---++
|   ^
|   |
\   /
 ---
  Boot memory content gets transferred to
  reserved area by firmware at the time of
  crash

This implicitly means that the sum of the sizes of boot memory, CPU
state data, HPTE region, DUMP preserving area and ELF core headers
can't be greater than the total memory size. But currently, a user is
allowed to specify any value as boot memory size. So, the above rule
is violated when a boot memory size around 50% of the total available
memory is specified. As the kernel is not handling this currently, it
may lead to undefined behavior. Fix it by setting an upper limit for
boot memory size to 25% of the total available memory. Also, instead
of using memblock_end_of_DRAM(), which doesn't take the holes, if any,
in the memory layout into account, use memblock_phys_mem_size() to
calculate the percentage of total available memory.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

This patch is based on top of the patchset to reuse-crashkernel-parameter-
for-fadump (http://patchwork.ozlabs.org/patch/711522).

Changes from v1:
* Using memblock_phys_mem_size() instead of memblock_end_of_DRAM() to
  get system RAM size.


 arch/powerpc/include/asm/fadump.h |3 +++
 arch/powerpc/kernel/fadump.c  |   16 +++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 60b9108..a3de219 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -43,6 +43,9 @@
 #define MIN_BOOT_MEM   (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
+ (0x1UL << 26))
 
+/* The upper limit percentage for user specified boot memory size (25%) */
+#define MAX_BOOT_MEM_RATIO 4
+
 #define memblock_num_regions(memblock_type)(memblock.memblock_type.cnt)
 
 /* Firmware provided dump sections */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index e013f8f..21d5404 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -221,12 +221,26 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
, );
if (ret == 0 && size > 0) {
+   unsigned long max_size;
+
fw_dump.reserve_bootvar = (unsigned long)size;
+
+   /*
+* Adjust if the boot memory size specified is above
+* the upper limit.
+*/
+   max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
+   if (fw_dump.reserve_bootvar > max_size) {
+   fw_dump.reserve_bootvar = max_size;
+   pr_info("Adjusted boot memory size to %luMB\n",
+   (fw_dump.reserve_bootvar >> 20));
+   }
+
return fw_dump.reserve_bootvar;
}
 
/* divide by 20 to get 5% of value */
-   size = memblock_end_of_DRAM() / 20;
+   size = memblock_phys_mem_size() / 20;
 
/* round it down in multiples of 256 */
size = size & ~0x0FFFUL;



Re: [PATCH] powerpc/fadump: set an upper limit for boot memory size

2017-02-21 Thread Hari Bathini

Hi Michael,


On Friday 17 February 2017 11:54 AM, Michael Ellerman wrote:

Hari Bathini <hbath...@linux.vnet.ibm.com> writes:


diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index de7d39a..d5107f4 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -222,6 +222,18 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
, );
if (ret == 0 && size > 0) {
fw_dump.reserve_bootvar = (unsigned long)size;
+   /*
+* Adjust if the boot memory size specified is above
+* the upper limit.
+*/
+   if (fw_dump.reserve_bootvar >
+   (memblock_end_of_DRAM() / MAX_BOOT_MEM_RATIO)) {

Using memblock_end_of_DRAM() doesn't take into account the fact that you
might have holes in your memory layout.

Possibly on PowerVM that never happens, but I don't think we should
write the code to assume that, if possible.



I think memblock_phys_mem_size() can fill in..

In the same file, memblock_end_of_DRAM() is also used when nothing
is specified through cmdline. Let me also change that and respin..

Thanks
Hari



[PATCH] powerpc/fadump: set an upper limit for boot memory size

2017-02-15 Thread Hari Bathini
By default, 5% of system RAM is reserved for preserving boot memory.
Alternatively, a user can specify the amount of memory to reserve.
See Documentation/powerpc/firmware-assisted-dump.txt for details. In
addition to the memory reserved for preserving boot memory, some more
memory is reserved, to save HPTE region, CPU state data and ELF core
headers.

Memory Reservation during first kernel looks like below:

  Low memoryTop of memory
  0  boot memory size   |
  |   |   |<--Reserved dump area -->|
  V   V   |   Permanent Reservation V
  +---+--/ /--+---++---++
  |   |   |CPU|HPTE|  DUMP |ELF |
  +---+--/ /--+---++---++
|   ^
|   |
\   /
 ---
  Boot memory content gets transferred to
  reserved area by firmware at the time of
  crash

The implicit rule here is that the sum of the sizes of boot memory,
CPU state data, HPTE region and ELF core headers can't be greater than
the total memory size. But currently, a user is allowed to specify any
value as boot memory size. So, the above rule is violated when a boot
memory size closer to 50% of the total available memory is specified.
As the kernel is not handling this currently, it may lead to undefined
behavior. Fix it by setting an upper limit for boot memory size to 25%
of the total available memory.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

This patch is based on top of reuse-crashkernel-parameter-for-fadump patchset
(https://lists.ozlabs.org/pipermail/linuxppc-dev/2017-January/152724.html)


 arch/powerpc/include/asm/fadump.h |3 +++
 arch/powerpc/kernel/fadump.c  |   12 
 2 files changed, 15 insertions(+)

diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 60b9108..a3de219 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -43,6 +43,9 @@
 #define MIN_BOOT_MEM   (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
+ (0x1UL << 26))
 
+/* The upper limit percentage for user specified boot memory size (25%) */
+#define MAX_BOOT_MEM_RATIO 4
+
 #define memblock_num_regions(memblock_type)(memblock.memblock_type.cnt)
 
 /* Firmware provided dump sections */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index de7d39a..d5107f4 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -222,6 +222,18 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
, );
if (ret == 0 && size > 0) {
fw_dump.reserve_bootvar = (unsigned long)size;
+   /*
+* Adjust if the boot memory size specified is above
+* the upper limit.
+*/
+   if (fw_dump.reserve_bootvar >
+   (memblock_end_of_DRAM() / MAX_BOOT_MEM_RATIO)) {
+   fw_dump.reserve_bootvar = (memblock_end_of_DRAM() /
+  MAX_BOOT_MEM_RATIO);
+   pr_info("Adjusted boot memory size to %luMB\n",
+   (fw_dump.reserve_bootvar >> 20));
+   }
+
return fw_dump.reserve_bootvar;
}
 



Re: [PATCH v4 2/5] ia64: reuse append_elf_note() and final_note() functions

2017-01-17 Thread Hari Bathini



On Friday 06 January 2017 07:33 AM, Dave Young wrote:

On 01/05/17 at 11:01pm, Hari Bathini wrote:

Get rid of multiple definitions of append_elf_note() & final_note()
functions. Reuse these functions compiled under CONFIG_CRASH_CORE
Also, define Elf_Word and use it instead of generic u32 or the more
specific Elf64_Word.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v3:
* Dropped hard-coded values and used DIV_ROUND_UP().

Changes from v2:
* Added a definition for Elf_Word.
* Used IA64 version of append_elf_note() and final_note() functions.


  arch/ia64/kernel/crash.c   |   22 --
  include/linux/crash_core.h |4 
  include/linux/elf.h|2 ++
  kernel/crash_core.c|   34 ++
  kernel/kexec_core.c|   28 
  5 files changed, 20 insertions(+), 70 deletions(-)

diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index 2955f35..75859a0 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -27,28 +27,6 @@ static int kdump_freeze_monarch;
  static int kdump_on_init = 1;
  static int kdump_on_fatal_mca = 1;
  
-static inline Elf64_Word

-*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
-   size_t data_len)
-{
-   struct elf_note *note = (struct elf_note *)buf;
-   note->n_namesz = strlen(name) + 1;
-   note->n_descsz = data_len;
-   note->n_type   = type;
-   buf += (sizeof(*note) + 3)/4;
-   memcpy(buf, name, note->n_namesz);
-   buf += (note->n_namesz + 3)/4;
-   memcpy(buf, data, data_len);
-   buf += (data_len + 3)/4;
-   return buf;
-}
-
-static void
-final_note(void *buf)
-{
-   memset(buf, 0, sizeof(struct elf_note));
-}
-
  extern void ia64_dump_cpu_regs(void *);
  
  static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 18d0f94..541a197 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -55,6 +55,10 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
  extern size_t vmcoreinfo_size;
  extern size_t vmcoreinfo_max_size;
  
+Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,

+ void *data, size_t data_len);
+void final_note(Elf_Word *buf);
+
  int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
  int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 20fa8d8..ba069e8 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -29,6 +29,7 @@ extern Elf32_Dyn _DYNAMIC [];
  #define elf_note  elf32_note
  #define elf_addr_tElf32_Off
  #define Elf_Half  Elf32_Half
+#define Elf_Word   Elf32_Word
  
  #else
  
@@ -39,6 +40,7 @@ extern Elf64_Dyn _DYNAMIC [];

  #define elf_note  elf64_note
  #define elf_addr_tElf64_Off
  #define Elf_Half  Elf64_Half
+#define Elf_Word   Elf64_Word
  
  #endif
  
diff --git a/kernel/crash_core.c b/kernel/crash_core.c

index 80b441d..362dace 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -291,32 +291,26 @@ int __init parse_crashkernel_low(char *cmdline,
"crashkernel=", suffix_tbl[SUFFIX_LOW]);
  }
  
-static u32 *append_elf_note(u32 *buf, char *name, unsigned int type,

-   void *data, size_t data_len)
+Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
+ void *data, size_t data_len)
  {
-   struct elf_note note;
-
-   note.n_namesz = strlen(name) + 1;
-   note.n_descsz = data_len;
-   note.n_type   = type;
-   memcpy(buf, , sizeof(note));
-   buf += (sizeof(note) + 3)/4;
-   memcpy(buf, name, note.n_namesz);
-   buf += (note.n_namesz + 3)/4;
-   memcpy(buf, data, note.n_descsz);
-   buf += (note.n_descsz + 3)/4;
+   struct elf_note *note = (struct elf_note *)buf;
+
+   note->n_namesz = strlen(name) + 1;
+   note->n_descsz = data_len;
+   note->n_type   = type;
+   buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word));
+   memcpy(buf, name, note->n_namesz);
+   buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word));
+   memcpy(buf, data, data_len);
+   buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word));
  
  	return buf;

  }
  
-static void final_note(u32 *buf)

+void final_note(Elf_Word *buf)
  {
-   struct elf_note note;
-
-   note.n_namesz = 0;
-   note.n_descsz = 0;
-   note.n_type   = 0;
-   memcpy(buf, , sizeof(note));
+   memset(buf, 0, sizeof(struct elf_note));
  }
  
  static void update_vmcoreinfo_note(void)

diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 2179a16..263d764 100644
-

Re: [PATCH v2] powerpc/mm: export current mmu mode info

2016-09-26 Thread Hari Bathini

Hi Michael/Aneesh,

Thanks for reviewing the patch..


On Friday 23 September 2016 04:40 PM, Michael Ellerman wrote:

Hari Bathini <hbath...@linux.vnet.ibm.com> writes:


diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e2fb408..558987c 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -199,6 +199,21 @@ static inline void mmu_clear_feature(unsigned long feature)
  
  extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
  
+/*

+ * Possible MMU modes
+ */
+#define MMU_MODE_NONE   0
+#define MMU_MODE_RADIX  1
+#define MMU_MODE_HASH   2
+#define MMU_MODE_HASH32 3
+#define MMU_MODE_NOHASH 4
+#define MMU_MODE_NOHASH32   5

These are already defined in the same file:

/*
  * MMU families
  */
#define MMU_FTR_HPTE_TABLE  ASM_CONST(0x0001)
#define MMU_FTR_TYPE_8xxASM_CONST(0x0002)
#define MMU_FTR_TYPE_40xASM_CONST(0x0004)
#define MMU_FTR_TYPE_44xASM_CONST(0x0008)
#define MMU_FTR_TYPE_FSL_E  ASM_CONST(0x0010)
#define MMU_FTR_TYPE_47xASM_CONST(0x0020)
#define MMU_FTR_TYPE_RADIX  ASM_CONST(0x0040)

And the values for the current CPU are in cur_cpu_spec->mmu_features.


I primarily tried to introduce this patch as crash tool doesn't have 
access to

offset info (which is needed to access structure member mmu_features) early
in it's initialization process.


So if you must export anything, make it that value, and hopefully the
rest of the patch goes away.


On second thought, as long as we can get the vmemmap start address, for 
which
we have a variable already, we can push finding of MMU type for later. I 
may need
no kernel patch in that case. Working on patches for crash & 
makedumpfile tools

accordingly. Will post a v3 only if that doesn't work out..

Thanks
Hari



Re: [PATCH v2] powerpc/mm: export current mmu mode info

2016-09-23 Thread Hari Bathini



On Friday 23 September 2016 10:14 AM, Aneesh Kumar K.V wrote:

Hari Bathini <hbath...@linux.vnet.ibm.com> writes:


Hi Aneesh,


On Thursday 22 September 2016 09:54 PM, Aneesh Kumar K.V wrote:

Hari Bathini <hbath...@linux.vnet.ibm.com> writes:


The kernel now supports both radix and hash MMU modes. Tools like crash
and makedumpfile need to know the current MMU mode the kernel is using,
to debug/analyze it.  The current MMU mode depends on hardware support
and also whether disable_radix cmdline parameter is passed to the kernel.
The mmu_features member of cpu_spec structure holds the current MMU mode
a cpu is using. But the above mentioned tools need to know the MMU mode
early in their init process, when they may not have access to offset info
of structure members. A hard-coded offset may help but it won't be robust.

IIUC, you walk the linux page table and that should be more or less same

Taking the case of crash tool, vmemmap start value is currently
hard-coded to 0xf000UL but it changed to
0xc00aUL in case of radix.

All of that is already defined as variables in the kernel. You can look at
radix__early_init_mmu().



between radix/hash right except few bits. Now what crash will be
interested in will be the RPN part of the table which should be same
between hash/radix.

Though the walk is pretty much the same, the tool still needs to know
the right index values and vmemmap start to use, as they are different
for radix and hash..


This patch introduces a new global variable, which holds the current MMU
mode the kernel is running in and can be accessed by tools early in thier
init process,

Init process of what ? kernel or crash tool ?

tool initialization - crash or makedumpfile..


helping tools to initialize accurately for each MMU mode.
This patch also optimizes the radix_enabled() function call.


how do you differentiate between the hold linux page table format and
the new ? Can you also summarize what crash tool look for in the page
table ?

It needs the index sizes, masked bit values and page flag info to
do the page table walk. Since they can be different for hash and
radix..


Can you look at radix__early_init_mmu/hash__early_init_mmu and see you
can work with the variables defined there ?


Did consider that but didn't opt for it for a few reasons:

1. Will still need to know the MMU mode as huge page address translation
   is not the same for radix & hash.

2. Will have to get all these values from a crashed kernel when I can 
set them

   based on MMU mode. Less dependence on the failed kernel, the better..

3. Stash more variables in vmcoreinfo (for makedumpfile) when one is 
sufficient

   to serve the purpose.

Thanks
Hari



[PATCH] ppc64/book3s: export mmu type info

2016-09-22 Thread Hari Bathini
The kernel now supports both radix and hash MMU modes. Tools like crash
and makedumpfile need to know, the current MMU mode the kernel is using
to debug/analyze the kernel. The current MMU mode depends on H/W support
and also whether disable_radix cmdline parameter is passed to the kernel.
The mmu_features member of cpu_spec structure holds the current MMU mode
a cpu is using. But the above mentioned tools need to know, the MMU mode
early in their init process when they have no access to offset info of
structure members. A hard-coded offset may help but it won't be robust.

This patch introduces a new global variable, which holds the current MMU
mode the kernel is running in and can be accessed by tools early in thier
init process, helping tools to initialize accurately for each MMU mode.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu.h |5 +
 arch/powerpc/include/asm/book3s/64/pgtable.h |6 ++
 arch/powerpc/kernel/machine_kexec.c  |3 +++
 arch/powerpc/mm/hash_utils_64.c  |2 ++
 arch/powerpc/mm/pgtable-radix.c  |2 ++
 arch/powerpc/mm/pgtable_64.c |6 ++
 6 files changed, 24 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h 
b/arch/powerpc/include/asm/book3s/64/mmu.h
index 8afb0e0..af68df3 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -30,6 +30,11 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 
 #ifndef __ASSEMBLY__
 /*
+ * current MMU mode
+ */
+extern unsigned int current_mmu_mode;
+
+/*
  * ISA 3.0 partiton and process table entry format
  */
 struct prtb_entry {
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 263bf39..f7faebd 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -2,6 +2,12 @@
 #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
 
 /*
+ * Possible MMU modes
+ */
+#define HASH_MMU_MODE  0
+#define RADIX_MMU_MODE 1
+
+/*
  * Common bits between hash and Radix page table
  */
 #define _PAGE_BIT_SWAP_TYPE0
diff --git a/arch/powerpc/kernel/machine_kexec.c 
b/arch/powerpc/kernel/machine_kexec.c
index 2694d07..4ecc184 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -77,6 +77,9 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_SYMBOL(contig_page_data);
 #endif
 #if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+#ifdef CONFIG_PPC_BOOK3S
+   VMCOREINFO_SYMBOL(current_mmu_mode);
+#endif
VMCOREINFO_SYMBOL(vmemmap_list);
VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
VMCOREINFO_SYMBOL(mmu_psize_defs);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0821556..3c7855a 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -886,6 +886,8 @@ void __init hash__early_init_devtree(void)
 
 void __init hash__early_init_mmu(void)
 {
+   current_mmu_mode = HASH_MMU_MODE;
+
htab_init_page_sizes();
 
/*
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index af897d9..98fbc97 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -298,6 +298,8 @@ void __init radix__early_init_mmu(void)
 {
unsigned long lpcr;
 
+   current_mmu_mode = RADIX_MMU_MODE;
+
 #ifdef CONFIG_PPC_64K_PAGES
/* PAGE_SIZE mappings */
mmu_virtual_psize = MMU_PAGE_64K;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index f5e8d4e..04319ac 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -63,6 +63,12 @@
 
 #ifdef CONFIG_PPC_BOOK3S_64
 /*
+ * current MMU mode
+ */
+unsigned int current_mmu_mode;
+EXPORT_SYMBOL(current_mmu_mode);
+
+/*
  * partition table and process table for ISA 3.0
  */
 struct prtb_entry *process_tb;



Re: [PATCH v2] powerpc/mm: export current mmu mode info

2016-09-22 Thread Hari Bathini



On Thursday 22 September 2016 09:32 PM, Hari Bathini wrote:

The kernel now supports both radix and hash MMU modes. Tools like crash
and makedumpfile need to know the current MMU mode the kernel is using,
to debug/analyze it.  The current MMU mode depends on hardware support
and also whether disable_radix cmdline parameter is passed to the kernel.
The mmu_features member of cpu_spec structure holds the current MMU mode
a cpu is using. But the above mentioned tools need to know the MMU mode
early in their init process, when they may not have access to offset info
of structure members. A hard-coded offset may help but it won't be robust.

This patch introduces a new global variable, which holds the current MMU
mode the kernel is running in and can be accessed by tools early in thier
init process, helping tools to initialize accurately for each MMU mode.
This patch also optimizes the radix_enabled() function call.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v1:
* Patch name changed from "ppc64/book3s: export mmu type info"
* Optimized radix_enabled() function


  arch/powerpc/include/asm/mmu.h  |   22 +-
  arch/powerpc/kernel/machine_kexec.c |3 +++
  arch/powerpc/mm/hash_utils_64.c |2 ++
  arch/powerpc/mm/pgtable-radix.c |2 ++
  arch/powerpc/mm/pgtable.c   |6 ++
  arch/powerpc/mm/tlb_hash32.c|1 +
  arch/powerpc/mm/tlb_nohash.c|2 ++
  7 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e2fb408..558987c 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -199,6 +199,21 @@ static inline void mmu_clear_feature(unsigned long feature)

  extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;

+/*
+ * Possible MMU modes
+ */
+#define MMU_MODE_NONE   0
+#define MMU_MODE_RADIX  1
+#define MMU_MODE_HASH   2
+#define MMU_MODE_HASH32 3
+#define MMU_MODE_NOHASH 4
+#define MMU_MODE_NOHASH32   5
+
+/*
+ * current MMU mode
+ */
+extern unsigned int current_mmu_mode __read_mostly;
+
  #ifdef CONFIG_PPC64
  /* This is our real memory area size on ppc64 server, on embedded, we
   * make it match the size our of bolted TLB area
@@ -218,7 +233,12 @@ static inline void assert_pte_locked(struct mm_struct *mm, 
unsigned long addr)
  #ifdef CONFIG_PPC_RADIX_MMU
  static inline bool radix_enabled(void)
  {
-   return mmu_has_feature(MMU_FTR_TYPE_RADIX);
+   if (current_mmu_mode == MMU_MODE_RADIX)
+   return true;
+   else if (current_mmu_mode != MMU_MODE_NONE)
+   return false;
+   else
+   return mmu_has_feature(MMU_FTR_TYPE_RADIX);
  }

  static inline bool early_radix_enabled(void)
diff --git a/arch/powerpc/kernel/machine_kexec.c 
b/arch/powerpc/kernel/machine_kexec.c
index 2694d07..4ecc184 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -77,6 +77,9 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_SYMBOL(contig_page_data);
  #endif
  #if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+#ifdef CONFIG_PPC_BOOK3S
+   VMCOREINFO_SYMBOL(current_mmu_mode);


Oops! This doesn't have to be under any flag. Let me resend.

Thanks
Hari



[PATCH v2] powerpc/mm: export current mmu mode info

2016-09-22 Thread Hari Bathini
The kernel now supports both radix and hash MMU modes. Tools like crash
and makedumpfile need to know the current MMU mode the kernel is using,
to debug/analyze it.  The current MMU mode depends on hardware support
and also whether disable_radix cmdline parameter is passed to the kernel.
The mmu_features member of cpu_spec structure holds the current MMU mode
a cpu is using. But the above mentioned tools need to know the MMU mode
early in their init process, when they may not have access to offset info
of structure members. A hard-coded offset may help but it won't be robust.

This patch introduces a new global variable, which holds the current MMU
mode the kernel is running in and can be accessed by tools early in thier
init process, helping tools to initialize accurately for each MMU mode.
This patch also optimizes the radix_enabled() function call.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v1:
* Patch name changed from "ppc64/book3s: export mmu type info"
* Optimized radix_enabled() function


 arch/powerpc/include/asm/mmu.h  |   22 +-
 arch/powerpc/kernel/machine_kexec.c |3 +++
 arch/powerpc/mm/hash_utils_64.c |2 ++
 arch/powerpc/mm/pgtable-radix.c |2 ++
 arch/powerpc/mm/pgtable.c   |6 ++
 arch/powerpc/mm/tlb_hash32.c|1 +
 arch/powerpc/mm/tlb_nohash.c|2 ++
 7 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e2fb408..558987c 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -199,6 +199,21 @@ static inline void mmu_clear_feature(unsigned long feature)
 
 extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
 
+/*
+ * Possible MMU modes
+ */
+#define MMU_MODE_NONE   0
+#define MMU_MODE_RADIX  1
+#define MMU_MODE_HASH   2
+#define MMU_MODE_HASH32 3
+#define MMU_MODE_NOHASH 4
+#define MMU_MODE_NOHASH32   5
+
+/*
+ * current MMU mode
+ */
+extern unsigned int current_mmu_mode __read_mostly;
+
 #ifdef CONFIG_PPC64
 /* This is our real memory area size on ppc64 server, on embedded, we
  * make it match the size our of bolted TLB area
@@ -218,7 +233,12 @@ static inline void assert_pte_locked(struct mm_struct *mm, 
unsigned long addr)
 #ifdef CONFIG_PPC_RADIX_MMU
 static inline bool radix_enabled(void)
 {
-   return mmu_has_feature(MMU_FTR_TYPE_RADIX);
+   if (current_mmu_mode == MMU_MODE_RADIX)
+   return true;
+   else if (current_mmu_mode != MMU_MODE_NONE)
+   return false;
+   else
+   return mmu_has_feature(MMU_FTR_TYPE_RADIX);
 }
 
 static inline bool early_radix_enabled(void)
diff --git a/arch/powerpc/kernel/machine_kexec.c 
b/arch/powerpc/kernel/machine_kexec.c
index 2694d07..4ecc184 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -77,6 +77,9 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_SYMBOL(contig_page_data);
 #endif
 #if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+#ifdef CONFIG_PPC_BOOK3S
+   VMCOREINFO_SYMBOL(current_mmu_mode);
+#endif
VMCOREINFO_SYMBOL(vmemmap_list);
VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
VMCOREINFO_SYMBOL(mmu_psize_defs);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0821556..a566a95 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -886,6 +886,8 @@ void __init hash__early_init_devtree(void)
 
 void __init hash__early_init_mmu(void)
 {
+   current_mmu_mode = MMU_MODE_HASH;
+
htab_init_page_sizes();
 
/*
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index af897d9..4b0ad48 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -298,6 +298,8 @@ void __init radix__early_init_mmu(void)
 {
unsigned long lpcr;
 
+   current_mmu_mode = MMU_MODE_RADIX;
+
 #ifdef CONFIG_PPC_64K_PAGES
/* PAGE_SIZE mappings */
mmu_virtual_psize = MMU_PAGE_64K;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 0b6fb24..4638a00 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -31,6 +31,12 @@
 #include 
 #include 
 
+/*
+ * current MMU mode
+ */
+unsigned int current_mmu_mode __read_mostly = MMU_MODE_NONE;
+EXPORT_SYMBOL(current_mmu_mode);
+
 static inline int is_exec_fault(void)
 {
return current->thread.regs && TRAP(current->thread.regs) == 0x400;
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
index 702d768..0b55425 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -170,4 +170,5 @@ EXPORT_SYMBOL(flush_tlb_range);
 
 void __init early_init_mmu(void)
 {
+   current_mmu_mode = MMU_MODE_HASH32;
 }
diff --git a/arch/powerpc/mm/tlb_nohash.

[RESEND PATCH v2] powerpc/mm: export current mmu mode info

2016-09-22 Thread Hari Bathini
The kernel now supports both radix and hash MMU modes. Tools like crash
and makedumpfile need to know the current MMU mode the kernel is using,
to debug/analyze it.  The current MMU mode depends on hardware support
and also whether disable_radix cmdline parameter is passed to the kernel.
The mmu_features member of cpu_spec structure holds the current MMU mode
a cpu is using. But the above mentioned tools need to know the MMU mode
early in their init process, when they may not have access to offset info
of structure members. A hard-coded offset may help but it won't be robust.

This patch introduces a new global variable, which holds the current MMU
mode the kernel is running in and can be accessed by tools early in thier
init process, helping tools to initialize accurately for each MMU mode.
This patch also optimizes the radix_enabled() function call.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v1:
* Patch name changed from "ppc64/book3s: export mmu type info"
* Optimized radix_enabled() function
* Removed current_mmu_mode vmcoreinfo from under flags

 arch/powerpc/include/asm/mmu.h  |   22 +-
 arch/powerpc/kernel/machine_kexec.c |1 +
 arch/powerpc/mm/hash_utils_64.c |2 ++
 arch/powerpc/mm/pgtable-radix.c |2 ++
 arch/powerpc/mm/pgtable.c   |6 ++
 arch/powerpc/mm/tlb_hash32.c|1 +
 arch/powerpc/mm/tlb_nohash.c|2 ++
 7 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e2fb408..558987c 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -199,6 +199,21 @@ static inline void mmu_clear_feature(unsigned long feature)
 
 extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
 
+/*
+ * Possible MMU modes
+ */
+#define MMU_MODE_NONE   0
+#define MMU_MODE_RADIX  1
+#define MMU_MODE_HASH   2
+#define MMU_MODE_HASH32 3
+#define MMU_MODE_NOHASH 4
+#define MMU_MODE_NOHASH32   5
+
+/*
+ * current MMU mode
+ */
+extern unsigned int current_mmu_mode __read_mostly;
+
 #ifdef CONFIG_PPC64
 /* This is our real memory area size on ppc64 server, on embedded, we
  * make it match the size our of bolted TLB area
@@ -218,7 +233,12 @@ static inline void assert_pte_locked(struct mm_struct *mm, 
unsigned long addr)
 #ifdef CONFIG_PPC_RADIX_MMU
 static inline bool radix_enabled(void)
 {
-   return mmu_has_feature(MMU_FTR_TYPE_RADIX);
+   if (current_mmu_mode == MMU_MODE_RADIX)
+   return true;
+   else if (current_mmu_mode != MMU_MODE_NONE)
+   return false;
+   else
+   return mmu_has_feature(MMU_FTR_TYPE_RADIX);
 }
 
 static inline bool early_radix_enabled(void)
diff --git a/arch/powerpc/kernel/machine_kexec.c 
b/arch/powerpc/kernel/machine_kexec.c
index 2694d07..2a32694 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -87,6 +87,7 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
VMCOREINFO_OFFSET(mmu_psize_def, shift);
 #endif
+   VMCOREINFO_SYMBOL(current_mmu_mode);
 }
 
 /*
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0821556..a566a95 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -886,6 +886,8 @@ void __init hash__early_init_devtree(void)
 
 void __init hash__early_init_mmu(void)
 {
+   current_mmu_mode = MMU_MODE_HASH;
+
htab_init_page_sizes();
 
/*
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index af897d9..4b0ad48 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -298,6 +298,8 @@ void __init radix__early_init_mmu(void)
 {
unsigned long lpcr;
 
+   current_mmu_mode = MMU_MODE_RADIX;
+
 #ifdef CONFIG_PPC_64K_PAGES
/* PAGE_SIZE mappings */
mmu_virtual_psize = MMU_PAGE_64K;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 0b6fb24..4638a00 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -31,6 +31,12 @@
 #include 
 #include 
 
+/*
+ * current MMU mode
+ */
+unsigned int current_mmu_mode __read_mostly = MMU_MODE_NONE;
+EXPORT_SYMBOL(current_mmu_mode);
+
 static inline int is_exec_fault(void)
 {
return current->thread.regs && TRAP(current->thread.regs) == 0x400;
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
index 702d768..0b55425 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -170,4 +170,5 @@ EXPORT_SYMBOL(flush_tlb_range);
 
 void __init early_init_mmu(void)
 {
+   current_mmu_mode = MMU_MODE_HASH32;
 }
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 050badc..74300a7 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm

Re: [PATCH v2] powerpc/mm: export current mmu mode info

2016-09-22 Thread Hari Bathini

Hi Aneesh,


On Thursday 22 September 2016 09:54 PM, Aneesh Kumar K.V wrote:

Hari Bathini <hbath...@linux.vnet.ibm.com> writes:


The kernel now supports both radix and hash MMU modes. Tools like crash
and makedumpfile need to know the current MMU mode the kernel is using,
to debug/analyze it.  The current MMU mode depends on hardware support
and also whether disable_radix cmdline parameter is passed to the kernel.
The mmu_features member of cpu_spec structure holds the current MMU mode
a cpu is using. But the above mentioned tools need to know the MMU mode
early in their init process, when they may not have access to offset info
of structure members. A hard-coded offset may help but it won't be robust.

IIUC, you walk the linux page table and that should be more or less same


Taking the case of crash tool, vmemmap start value is currently
hard-coded to 0xf000UL but it changed to
0xc00aUL in case of radix.


between radix/hash right except few bits. Now what crash will be
interested in will be the RPN part of the table which should be same
between hash/radix.


Though the walk is pretty much the same, the tool still needs to know
the right index values and vmemmap start to use, as they are different
for radix and hash..


This patch introduces a new global variable, which holds the current MMU
mode the kernel is running in and can be accessed by tools early in thier
init process,

Init process of what ? kernel or crash tool ?


tool initialization - crash or makedumpfile..


helping tools to initialize accurately for each MMU mode.
This patch also optimizes the radix_enabled() function call.


how do you differentiate between the hold linux page table format and
the new ? Can you also summarize what crash tool look for in the page
table ?


It needs the index sizes, masked bit values and page flag info to
do the page table walk. Since they can be different for hash and
radix..


Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v1:
* Patch name changed from "ppc64/book3s: export mmu type info"
* Optimized radix_enabled() function


  arch/powerpc/include/asm/mmu.h  |   22 +-
  arch/powerpc/kernel/machine_kexec.c |3 +++
  arch/powerpc/mm/hash_utils_64.c |2 ++
  arch/powerpc/mm/pgtable-radix.c |2 ++
  arch/powerpc/mm/pgtable.c   |6 ++
  arch/powerpc/mm/tlb_hash32.c|1 +
  arch/powerpc/mm/tlb_nohash.c|2 ++
  7 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e2fb408..558987c 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -199,6 +199,21 @@ static inline void mmu_clear_feature(unsigned long feature)

  extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;

+/*
+ * Possible MMU modes
+ */
+#define MMU_MODE_NONE   0
+#define MMU_MODE_RADIX  1
+#define MMU_MODE_HASH   2
+#define MMU_MODE_HASH32 3
+#define MMU_MODE_NOHASH 4
+#define MMU_MODE_NOHASH32   5
+
+/*
+ * current MMU mode
+ */
+extern unsigned int current_mmu_mode __read_mostly;
+
  #ifdef CONFIG_PPC64
  /* This is our real memory area size on ppc64 server, on embedded, we
   * make it match the size our of bolted TLB area
@@ -218,7 +233,12 @@ static inline void assert_pte_locked(struct mm_struct *mm, 
unsigned long addr)
  #ifdef CONFIG_PPC_RADIX_MMU
  static inline bool radix_enabled(void)
  {
-   return mmu_has_feature(MMU_FTR_TYPE_RADIX);
+   if (current_mmu_mode == MMU_MODE_RADIX)
+   return true;
+   else if (current_mmu_mode != MMU_MODE_NONE)
+   return false;
+   else
+   return mmu_has_feature(MMU_FTR_TYPE_RADIX);
  }


That is not optimization, that makes it slow. We hotpatch mmu_has_feature().


Ugh! I didn't consider that..

Thanks
Hari


  static inline bool early_radix_enabled(void)
diff --git a/arch/powerpc/kernel/machine_kexec.c 
b/arch/powerpc/kernel/machine_kexec.c
index 2694d07..4ecc184 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -77,6 +77,9 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_SYMBOL(contig_page_data);
  #endif
  #if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+#ifdef CONFIG_PPC_BOOK3S
+   VMCOREINFO_SYMBOL(current_mmu_mode);
+#endif
VMCOREINFO_SYMBOL(vmemmap_list);
VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
VMCOREINFO_SYMBOL(mmu_psize_defs);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0821556..a566a95 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -886,6 +886,8 @@ void __init hash__early_init_devtree(void)

  void __init hash__early_init_mmu(void)
  {
+   current_mmu_mode = MMU_MODE_HASH;
+
htab_init_page_sizes();

/*
diff --git a/arch/pow

Re: [PATCH v3 2/2] powerpc/fadump: parse fadump reserve memory size based on memory range

2016-08-25 Thread Hari Bathini



On Thursday 25 August 2016 12:31 PM, Dave Young wrote:

On 08/10/16 at 03:35pm, Hari Bathini wrote:

When fadump is enabled, by default 5% of system RAM is reserved for
fadump kernel. While that works for most cases, it is not good enough
for every case.

Currently, to override the default value, fadump supports specifying
memory to reserve with fadump_reserve_mem=size, where only a fixed size
can be specified. This patch adds support to specify memory size to
reserve for different memory ranges as below:

fadump_reserve_mem=:[,:,...]

Hi, Hari


Hi Dave,


I do not understand why you need introduce the new cmdline param, what's
the difference between the "fadump reserved" memory and the memory


I am not introducing a new parameter but adding a new syntax for
an existing parameter.


reserved by "crashkernel="? Can fadump just use crashkernel= to reserve
memory?


Not all syntaxes supported by crashkernel apply for fadump_reserve_mem.
Nonetheless, it is worth considering reuse of crashkernel parameter instead
of fadump_reserve_mem. Let me see what I can do about this..

Thanks
Hari


Thanks
Dave


Supporting range based input for "fadump_reserve_mem" parameter helps
using the same commandline parameter for different system memory sizes.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
Reviewed-by: Mahesh J Salgaonkar <mah...@linux.vnet.ibm.com>
---

Changes from v2:
1. Updated changelog


  arch/powerpc/kernel/fadump.c |   63 --
  1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index b3a6633..7c01b5b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -193,6 +193,55 @@ static unsigned long init_fadump_mem_struct(struct 
fadump_mem_struct *fdm,
return addr;
  }
  
+/*

+ * This function parses command line for fadump_reserve_mem=
+ *
+ * Supports the below two syntaxes:
+ *1. fadump_reserve_mem=size
+ *2. fadump_reserve_mem=ramsize-range:size[,...]
+ *
+ * Sets fw_dump.reserve_bootvar with the memory size
+ * provided, 0 otherwise
+ *
+ * The function returns -EINVAL on failure, 0 otherwise.
+ */
+static int __init parse_fadump_reserve_mem(void)
+{
+   char *name = "fadump_reserve_mem=";
+   char *fadump_cmdline = NULL, *cur;
+
+   fw_dump.reserve_bootvar = 0;
+
+   /* find fadump_reserve_mem and use the last one if there are many */
+   cur = strstr(boot_command_line, name);
+   while (cur) {
+   fadump_cmdline = cur;
+   cur = strstr(cur+1, name);
+   }
+
+   /* when no fadump_reserve_mem= cmdline option is provided */
+   if (!fadump_cmdline)
+   return 0;
+
+   fadump_cmdline += strlen(name);
+
+   /* for fadump_reserve_mem=size cmdline syntax */
+   if (!is_colon_in_param(fadump_cmdline)) {
+   fw_dump.reserve_bootvar = memparse(fadump_cmdline, NULL);
+   return 0;
+   }
+
+   /* for fadump_reserve_mem=ramsize-range:size[,...] cmdline syntax */
+   cur = fadump_cmdline;
+   fw_dump.reserve_bootvar = parse_mem_range_size("fadump_reserve_mem",
+   , memblock_phys_mem_size());
+   if (cur == fadump_cmdline) {
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
  /**
   * fadump_calculate_reserve_size(): reserve variable boot area 5% of System 
RAM
   *
@@ -212,12 +261,17 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
  {
unsigned long size;
  
+	/* sets fw_dump.reserve_bootvar */

+   parse_fadump_reserve_mem();
+
/*
 * Check if the size is specified through fadump_reserve_mem= cmdline
 * option. If yes, then use that.
 */
if (fw_dump.reserve_bootvar)
return fw_dump.reserve_bootvar;
+   else
+   printk(KERN_INFO "fadump: calculating default boot size\n");
  
  	/* divide by 20 to get 5% of value */

size = memblock_end_of_DRAM() / 20;
@@ -348,15 +402,6 @@ static int __init early_fadump_param(char *p)
  }
  early_param("fadump", early_fadump_param);
  
-/* Look for fadump_reserve_mem= cmdline option */

-static int __init early_fadump_reserve_mem(char *p)
-{
-   if (p)
-   fw_dump.reserve_bootvar = memparse(p, );
-   return 0;
-}
-early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-
  static void register_fw_dump(struct fadump_mem_struct *fdm)
  {
int rc;


___
kexec mailing list
ke...@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec





[PATCH 3/3] powerpc/fadump: update documentation about crashkernel parameter reuse

2016-11-10 Thread Hari Bathini
As we are reusing crashkernel parameter instead of fadump_reserve_mem
parameter to specify the memory to reserve for fadump's crash kernel,
update the documentation accordingly.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 Documentation/powerpc/firmware-assisted-dump.txt |   23 ++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/Documentation/powerpc/firmware-assisted-dump.txt 
b/Documentation/powerpc/firmware-assisted-dump.txt
index 3007bc9..8394bc8 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -55,10 +55,14 @@ as follows:
  booted with restricted memory. By default, the boot memory
  size will be the larger of 5% of system RAM or 256MB.
  Alternatively, user can also specify boot memory size
- through boot parameter 'fadump_reserve_mem=' which will
- override the default calculated size. Use this option
- if default boot memory size is not sufficient for second
- kernel to boot successfully.
+ through boot parameter 'crashkernel=' which will override
+ the default calculated size. Use this option if default
+ boot memory size is not sufficient for second kernel to
+ boot successfully. For syntax of crashkernel= parameter,
+ refer to Documentation/kdump/kdump.txt. If any offset is
+ provided in crashkernel= parameter, it will be ignored
+ as fadump reserves memory at end of RAM for boot memory
+ dump preservation in case of a crash.
 
 -- After the low memory (boot memory) area has been saved, the
firmware will reset PCI and other hardware state.  It will
@@ -158,13 +162,16 @@ How to enable firmware-assisted dump (fadump):
 
 1. Set config option CONFIG_FA_DUMP=y and build kernel.
 2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
-3. Optionally, user can also set 'fadump_reserve_mem=' kernel cmdline
+3. Optionally, user can also set 'crashkernel=' kernel cmdline
to specify size of the memory to reserve for boot memory dump
preservation.
 
-NOTE: If firmware-assisted dump fails to reserve memory then it will
-   fallback to existing kdump mechanism if 'crashkernel=' option
-   is set at kernel cmdline.
+NOTE: 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
+ use 'crashkernel=' to specify size of the memory to reserve
+ for boot memory dump preservation.
+  2. If firmware-assisted dump fails to reserve memory then it
+ will fallback to existing kdump mechanism if 'crashkernel='
+ option is set at kernel cmdline.
 
 Sysfs/debugfs files:
 



[PATCH 1/3] crash: move crashkernel parsing and vmcore related code under CONFIG_CRASH_CORE

2016-11-10 Thread Hari Bathini
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.

But currently, code related to vmcoreinfo and parsing of crashkernel
parameter is built under CONFIG_KEXEC_CORE. This patch introduces
CONFIG_CRASH_CORE and moves the above mentioned code under this config,
allowing code reuse without dependency on CONFIG_KEXEC. While here,
removing the multiple definitions of append_elf_note() and final_note()
for one defined under CONFIG_CONFIG_CORE. There is no functional change
with this patch.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/Kconfig   |4 
 arch/ia64/kernel/crash.c   |   22 --
 arch/powerpc/Kconfig   |   10 -
 arch/powerpc/include/asm/fadump.h  |2 
 arch/powerpc/kernel/crash.c|2 
 arch/powerpc/kernel/fadump.c   |   34 ---
 arch/powerpc/kernel/setup-common.c |5 
 include/linux/crash_core.h |   75 ++
 include/linux/kexec.h  |   63 -
 kernel/Makefile|1 
 kernel/crash_core.c|  450 
 kernel/kexec_core.c|  435 ---
 12 files changed, 550 insertions(+), 553 deletions(-)
 create mode 100644 include/linux/crash_core.h
 create mode 100644 kernel/crash_core.c

diff --git a/arch/Kconfig b/arch/Kconfig
index 659bdd0..4ad34b9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -2,7 +2,11 @@
 # General architecture dependent options
 #
 
+config CRASH_CORE
+   bool
+
 config KEXEC_CORE
+   select CRASH_CORE
bool
 
 config OPROFILE
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index 2955f35..75859a0 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -27,28 +27,6 @@ static int kdump_freeze_monarch;
 static int kdump_on_init = 1;
 static int kdump_on_fatal_mca = 1;
 
-static inline Elf64_Word
-*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
-   size_t data_len)
-{
-   struct elf_note *note = (struct elf_note *)buf;
-   note->n_namesz = strlen(name) + 1;
-   note->n_descsz = data_len;
-   note->n_type   = type;
-   buf += (sizeof(*note) + 3)/4;
-   memcpy(buf, name, note->n_namesz);
-   buf += (note->n_namesz + 3)/4;
-   memcpy(buf, data, data_len);
-   buf += (data_len + 3)/4;
-   return buf;
-}
-
-static void
-final_note(void *buf)
-{
-   memset(buf, 0, sizeof(struct elf_note));
-}
-
 extern void ia64_dump_cpu_regs(void *);
 
 static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 65fba4c..644703f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -479,21 +479,23 @@ config RELOCATABLE
  load address of the kernel (eg. u-boot/mkimage).
 
 config CRASH_DUMP
-   bool "Build a kdump crash kernel"
+   bool "Build a dump capture kernel"
depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x || FSL_BOOKE
help
- Build a kernel suitable for use as a kdump capture kernel.
+ Build a kernel suitable for use as a dump capture kernel.
  The same kernel binary can be used as production kernel and dump
  capture kernel.
 
 config FA_DUMP
bool "Firmware-assisted dump"
-   depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC
+   depends on PPC64 && PPC_RTAS
+   select CRASH_CORE
+   select CRASH_DUMP
help
  A robust mechanism to get reliable kernel crash dump with
  assistance from firmware. This approach does not use kexec,
- instead firmware assists in booting the kdump kernel
+ instead firmware assists in booting the capture kernel
  while preserving memory contents. Firmware-assisted dump
  is meant to be a kdump replacement offering robustness and
  speed not possible without system firmware assistance.
diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 0031806..60b9108 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -73,6 +73,8 @@
reg_entry++;\
 })
 
+extern int crashing_cpu;
+
 /* Kernel Dump section info */
 struct fadump_section {
__be32  request_flag;
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 47b63de..cbabb5a 100644
--- a/arch/powerpc/kernel/crash.c

[PATCH 2/3] powerpc/fadump: reuse crashkernel parameter for fadump memory reservation

2016-11-10 Thread Hari Bathini
fadump supports specifying memory to reserve for fadump's crash kernel
with fadump_reserve_mem kernel parameter. This parameter currently
supports passing a fixed memory size, like fadump_reserve_mem=
only. This patch aims to add support for other syntaxes like range-based
memory size :[,:,:,...]
which allows using the same parameter to boot the kernel with different
system RAM sizes.

As crashkernel parameter already supports the above mentioned syntaxes,
this patch removes fadump_reserve_mem parameter and reuses crashkernel
parameter instead, to specify memory for fadump's crash kernel memory
reservation as well. If any offset is provided in crashkernel parameter,
it will be ignored in case of fadump, as fadump reserves memory at end
of RAM.

Advantages using crashkernel parameter instead of fadump_reserve_mem
parameter are one less kernel parameter overall, code reuse and support
for multiple syntaxes to specify memory.

Suggested-by: Dave Young <dyo...@redhat.com>
Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/fadump.c |   23 ++-
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index db0b339..de7d39a 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -210,14 +210,20 @@ static unsigned long init_fadump_mem_struct(struct 
fadump_mem_struct *fdm,
  */
 static inline unsigned long fadump_calculate_reserve_size(void)
 {
-   unsigned long size;
+   int ret;
+   unsigned long long base, size;
 
/*
-* Check if the size is specified through fadump_reserve_mem= cmdline
-* option. If yes, then use that.
+* Check if the size is specified through crashkernel= cmdline
+* option. If yes, then use that but ignore base as fadump
+* reserves memory at end of RAM.
 */
-   if (fw_dump.reserve_bootvar)
+   ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+   , );
+   if (ret == 0 && size > 0) {
+   fw_dump.reserve_bootvar = (unsigned long)size;
return fw_dump.reserve_bootvar;
+   }
 
/* divide by 20 to get 5% of value */
size = memblock_end_of_DRAM() / 20;
@@ -353,15 +359,6 @@ static int __init early_fadump_param(char *p)
 }
 early_param("fadump", early_fadump_param);
 
-/* Look for fadump_reserve_mem= cmdline option */
-static int __init early_fadump_reserve_mem(char *p)
-{
-   if (p)
-   fw_dump.reserve_bootvar = memparse(p, );
-   return 0;
-}
-early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-
 static void register_fw_dump(struct fadump_mem_struct *fdm)
 {
int rc;



[PATCH 0/3] kexec/fadump: remove dependency with CONFIG_KEXEC and reuse crashkernel parameter for fadump

2016-11-10 Thread Hari Bathini
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.

This patchset removes dependency with CONFIG_KEXEC for crashkernel parameter
and vmcoreinfo related code as it can be reused without kexec support. Also,
crashkernel parameter is reused instead of fadump_reserve_mem to reserve
memory for fadump.

The first patch moves crashkernel parameter parsing and vmcoreinfo related
code under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE to reuse without
depending on it. The second patch reuses crashkernel for reserving memory
for fadump instead of fadump_reserve_mem. This has the advantage of using
all the syntaxes crashkernel supports, for fadump as well. The third patch
updates fadump kernel documentation about use of crashkernel parameter.

---

Hari Bathini (3):
  crash: move crashkernel parsing and vmcore related code under 
CONFIG_CRASH_CORE
  powerpc/fadump: reuse crashkernel parameter for fadump memory reservation
  powerpc/fadump: update documentation about crashkernel parameter reuse


 Documentation/powerpc/firmware-assisted-dump.txt |   23 +
 arch/Kconfig |4 
 arch/ia64/kernel/crash.c |   22 -
 arch/powerpc/Kconfig |   10 
 arch/powerpc/include/asm/fadump.h|2 
 arch/powerpc/kernel/crash.c  |2 
 arch/powerpc/kernel/fadump.c |   57 +--
 arch/powerpc/kernel/setup-common.c   |5 
 include/linux/crash_core.h   |   75 
 include/linux/kexec.h|   63 ---
 kernel/Makefile  |1 
 kernel/crash_core.c  |  450 ++
 kernel/kexec_core.c  |  435 -
 13 files changed, 575 insertions(+), 574 deletions(-)
 create mode 100644 include/linux/crash_core.h
 create mode 100644 kernel/crash_core.c



Re: [PATCH 1/3] crash: move crashkernel parsing and vmcore related code under CONFIG_CRASH_CORE

2016-11-14 Thread Hari Bathini



On Monday 14 November 2016 11:06 AM, Baoquan He wrote:

On 11/10/16 at 05:27pm, Hari Bathini wrote:

Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.

But currently, code related to vmcoreinfo and parsing of crashkernel
parameter is built under CONFIG_KEXEC_CORE. This patch introduces
CONFIG_CRASH_CORE and moves the above mentioned code under this config,
allowing code reuse without dependency on CONFIG_KEXEC. While here,
removing the multiple definitions of append_elf_note() and final_note()
for one defined under CONFIG_CONFIG_CORE. There is no functional change
with this patch.

Can't think of a reason to object.

Could it be that do the moving from kexec_core.c to crash_core.c only,
then do the arch specific clean up in another patch?


Right. Will move arch specific code into a separate patch, on the
next version..


Besides there's already a file crash_dump.h, can we reuse that?


Did think about it. But as it is meant for dump capture kernel
(CONFIG_CRASH_DUMP) and CONFIG_KEXEC_CORE being
independent, didn't pursue it..

Thanks
Hari



[PATCH v2 0/5] kexec/fadump: remove dependency with CONFIG_KEXEC and reuse crashkernel parameter for fadump

2016-11-25 Thread Hari Bathini
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.

This patchset removes dependency with CONFIG_KEXEC for crashkernel parameter
and vmcoreinfo related code as it can be reused without kexec support. Also,
crashkernel parameter is reused instead of fadump_reserve_mem to reserve
memory for fadump.

The first patch moves crashkernel parameter parsing and vmcoreinfo related
code under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE. The second patch
reuses the definitions of append_elf_note() & final_note() functions under
CONFIG_CRASH_CORE in IA64 arch code. The third patch removes dependency on
CONFIG_KEXEC for firmware-assisted dump (fadump) in powerpc. The next patch
reuses crashkernel parameter for reserving memory for fadump, instead of the
fadump_reserve_mem parameter. This has the advantage of using all syntaxes
crashkernel parameter supports, for fadump as well. The last patch updates
fadump kernel documentation about use of crashkernel parameter.


Changes from v1:
* Moved arch specify code to seperate patches. Introduced one patch for IA64 
arch
  and another patch for powerpc in the process.

---

Hari Bathini (5):
  crash: move crashkernel parsing and vmcore related code under 
CONFIG_CRASH_CORE
  ia64: reuse append_elf_note() and final_note() functions
  powerpc/fadump: remove dependency with CONFIG_KEXEC
  powerpc/fadump: reuse crashkernel parameter for fadump memory reservation
  powerpc/fadump: update documentation about crashkernel parameter reuse


 Documentation/powerpc/firmware-assisted-dump.txt |   23 +
 arch/Kconfig |4 
 arch/ia64/kernel/crash.c |   22 -
 arch/powerpc/Kconfig |   10 
 arch/powerpc/include/asm/fadump.h|2 
 arch/powerpc/kernel/crash.c  |2 
 arch/powerpc/kernel/fadump.c |   57 +--
 arch/powerpc/kernel/setup-common.c   |5 
 include/linux/crash_core.h   |   75 
 include/linux/kexec.h|   63 ---
 kernel/Makefile  |1 
 kernel/crash_core.c  |  450 ++
 kernel/kexec_core.c  |  435 -
 13 files changed, 575 insertions(+), 574 deletions(-)
 create mode 100644 include/linux/crash_core.h
 create mode 100644 kernel/crash_core.c



[PATCH v2 3/5] powerpc/fadump: remove dependency with CONFIG_KEXEC

2016-11-25 Thread Hari Bathini
Now that crashkernel parameter parsing and vmcoreinfo related code is
moved under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE, remove
dependency with CONFIG_KEXEC for CONFIG_FA_DUMP. While here, get rid
of definitions of fadump_append_elf_note() & fadump_final_note()
functions to reuse similar functions compiled under CONFIG_CRASH_CORE.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig   |   10 ++
 arch/powerpc/include/asm/fadump.h  |2 ++
 arch/powerpc/kernel/crash.c|2 --
 arch/powerpc/kernel/fadump.c   |   34 +++---
 arch/powerpc/kernel/setup-common.c |5 +
 5 files changed, 16 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 65fba4c..644703f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -479,21 +479,23 @@ config RELOCATABLE
  load address of the kernel (eg. u-boot/mkimage).
 
 config CRASH_DUMP
-   bool "Build a kdump crash kernel"
+   bool "Build a dump capture kernel"
depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x || FSL_BOOKE
help
- Build a kernel suitable for use as a kdump capture kernel.
+ Build a kernel suitable for use as a dump capture kernel.
  The same kernel binary can be used as production kernel and dump
  capture kernel.
 
 config FA_DUMP
bool "Firmware-assisted dump"
-   depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC
+   depends on PPC64 && PPC_RTAS
+   select CRASH_CORE
+   select CRASH_DUMP
help
  A robust mechanism to get reliable kernel crash dump with
  assistance from firmware. This approach does not use kexec,
- instead firmware assists in booting the kdump kernel
+ instead firmware assists in booting the capture kernel
  while preserving memory contents. Firmware-assisted dump
  is meant to be a kdump replacement offering robustness and
  speed not possible without system firmware assistance.
diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 0031806..60b9108 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -73,6 +73,8 @@
reg_entry++;\
 })
 
+extern int crashing_cpu;
+
 /* Kernel Dump section info */
 struct fadump_section {
__be32  request_flag;
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 47b63de..cbabb5a 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -43,8 +43,6 @@
 #define IPI_TIMEOUT1
 #define REAL_MODE_TIMEOUT  1
 
-/* This keeps a track of which one is the crashing cpu. */
-int crashing_cpu = -1;
 static int time_to_dump;
 
 #define CRASH_HANDLER_MAX 3
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8f0c7c5..db0b339 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -486,34 +486,6 @@ fadump_read_registers(struct fadump_reg_entry *reg_entry, 
struct pt_regs *regs)
return reg_entry;
 }
 
-static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
-   void *data, size_t data_len)
-{
-   struct elf_note note;
-
-   note.n_namesz = strlen(name) + 1;
-   note.n_descsz = data_len;
-   note.n_type   = type;
-   memcpy(buf, , sizeof(note));
-   buf += (sizeof(note) + 3)/4;
-   memcpy(buf, name, note.n_namesz);
-   buf += (note.n_namesz + 3)/4;
-   memcpy(buf, data, note.n_descsz);
-   buf += (note.n_descsz + 3)/4;
-
-   return buf;
-}
-
-static void fadump_final_note(u32 *buf)
-{
-   struct elf_note note;
-
-   note.n_namesz = 0;
-   note.n_descsz = 0;
-   note.n_type   = 0;
-   memcpy(buf, , sizeof(note));
-}
-
 static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
 {
struct elf_prstatus prstatus;
@@ -524,8 +496,8 @@ static u32 *fadump_regs_to_elf_notes(u32 *buf, struct 
pt_regs *regs)
 * prstatus.pr_pid = 
 */
elf_core_copy_kernel_regs(_reg, regs);
-   buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
-   , sizeof(prstatus));
+   buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
+ , sizeof(prstatus));
return buf;
 }
 
@@ -666,7 +638,7 @@ static int __init fadump_build_cpu_notes(const struct 
fadump_mem_struct *fdm)
note_buf = fadump_regs_to_elf_notes(note_buf, );
}
}
-   fadump_final_note(note_buf);
+   final_note(note_buf);
 
if (fdh) {
pr_debug(&

[PATCH v2 5/5] powerpc/fadump: update documentation about crashkernel parameter reuse

2016-11-25 Thread Hari Bathini
As we are reusing crashkernel parameter instead of fadump_reserve_mem
parameter to specify the memory to reserve for fadump's crash kernel,
update the documentation accordingly.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 Documentation/powerpc/firmware-assisted-dump.txt |   23 ++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/Documentation/powerpc/firmware-assisted-dump.txt 
b/Documentation/powerpc/firmware-assisted-dump.txt
index 3007bc9..8394bc8 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -55,10 +55,14 @@ as follows:
  booted with restricted memory. By default, the boot memory
  size will be the larger of 5% of system RAM or 256MB.
  Alternatively, user can also specify boot memory size
- through boot parameter 'fadump_reserve_mem=' which will
- override the default calculated size. Use this option
- if default boot memory size is not sufficient for second
- kernel to boot successfully.
+ through boot parameter 'crashkernel=' which will override
+ the default calculated size. Use this option if default
+ boot memory size is not sufficient for second kernel to
+ boot successfully. For syntax of crashkernel= parameter,
+ refer to Documentation/kdump/kdump.txt. If any offset is
+ provided in crashkernel= parameter, it will be ignored
+ as fadump reserves memory at end of RAM for boot memory
+ dump preservation in case of a crash.
 
 -- After the low memory (boot memory) area has been saved, the
firmware will reset PCI and other hardware state.  It will
@@ -158,13 +162,16 @@ How to enable firmware-assisted dump (fadump):
 
 1. Set config option CONFIG_FA_DUMP=y and build kernel.
 2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
-3. Optionally, user can also set 'fadump_reserve_mem=' kernel cmdline
+3. Optionally, user can also set 'crashkernel=' kernel cmdline
to specify size of the memory to reserve for boot memory dump
preservation.
 
-NOTE: If firmware-assisted dump fails to reserve memory then it will
-   fallback to existing kdump mechanism if 'crashkernel=' option
-   is set at kernel cmdline.
+NOTE: 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
+ use 'crashkernel=' to specify size of the memory to reserve
+ for boot memory dump preservation.
+  2. If firmware-assisted dump fails to reserve memory then it
+ will fallback to existing kdump mechanism if 'crashkernel='
+ option is set at kernel cmdline.
 
 Sysfs/debugfs files:
 



[PATCH v2 1/5] crash: move crashkernel parsing and vmcore related code under CONFIG_CRASH_CORE

2016-11-25 Thread Hari Bathini
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.

But currently, code related to vmcoreinfo and parsing of crashkernel
parameter is built under CONFIG_KEXEC_CORE. This patch introduces
CONFIG_CRASH_CORE and moves the above mentioned code under this config,
allowing code reuse without dependency on CONFIG_KEXEC. There is no
functional change with this patch.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/Kconfig   |4 
 include/linux/crash_core.h |   71 +++
 include/linux/kexec.h  |   63 --
 kernel/Makefile|1 
 kernel/crash_core.c|  450 
 kernel/kexec_core.c|  407 
 6 files changed, 530 insertions(+), 466 deletions(-)
 create mode 100644 include/linux/crash_core.h
 create mode 100644 kernel/crash_core.c

diff --git a/arch/Kconfig b/arch/Kconfig
index 659bdd0..4ad34b9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -2,7 +2,11 @@
 # General architecture dependent options
 #
 
+config CRASH_CORE
+   bool
+
 config KEXEC_CORE
+   select CRASH_CORE
bool
 
 config OPROFILE
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
new file mode 100644
index 000..9a4f4b0
--- /dev/null
+++ b/include/linux/crash_core.h
@@ -0,0 +1,71 @@
+#ifndef LINUX_CRASH_CORE_H
+#define LINUX_CRASH_CORE_H
+
+#include 
+#include 
+#include 
+
+#define CRASH_CORE_NOTE_NAME  "CORE"
+#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4)
+#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4)
+#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4)
+
+#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +  \
+CRASH_CORE_NOTE_NAME_BYTES +   \
+CRASH_CORE_NOTE_DESC_BYTES)
+
+#define VMCOREINFO_BYTES  (4096)
+#define VMCOREINFO_NOTE_NAME  "VMCOREINFO"
+#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
+#define VMCOREINFO_NOTE_SIZE  ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +  \
+VMCOREINFO_NOTE_NAME_BYTES +   \
+VMCOREINFO_BYTES)
+
+typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
+
+void crash_save_vmcoreinfo(void);
+void arch_crash_save_vmcoreinfo(void);
+__printf(1, 2)
+void vmcoreinfo_append_str(const char *fmt, ...);
+phys_addr_t paddr_vmcoreinfo_note(void);
+
+#define VMCOREINFO_OSRELEASE(value) \
+   vmcoreinfo_append_str("OSRELEASE=%s\n", value)
+#define VMCOREINFO_PAGESIZE(value) \
+   vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
+#define VMCOREINFO_SYMBOL(name) \
+   vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long))
+#define VMCOREINFO_SIZE(name) \
+   vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
+ (unsigned long)sizeof(name))
+#define VMCOREINFO_STRUCT_SIZE(name) \
+   vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
+ (unsigned long)sizeof(struct name))
+#define VMCOREINFO_OFFSET(name, field) \
+   vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
+ (unsigned long)offsetof(struct name, field))
+#define VMCOREINFO_LENGTH(name, value) \
+   vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value)
+#define VMCOREINFO_NUMBER(name) \
+   vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
+#define VMCOREINFO_CONFIG(name) \
+   vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+#define VMCOREINFO_PAGE_OFFSET(value) \
+   vmcoreinfo_append_str("PAGE_OFFSET=%lx\n", (unsigned long)value)
+#define VMCOREINFO_VMALLOC_START(value) \
+   vmcoreinfo_append_str("VMALLOC_START=%lx\n", (unsigned long)value)
+#define VMCOREINFO_VMEMMAP_START(value) \
+   vmcoreinfo_append_str("VMEMMAP_START=%lx\n", (unsigned long)value)
+
+extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+extern size_t vmcoreinfo_size;
+extern size_t vmcoreinfo_max_size;
+
+int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
+   unsigned long long *crash_size, unsigned long long *crash_base);
+int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
+   unsigned long long *crash_size, unsigned long long *crash_base);
+int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
+   unsi

  1   2   3   4   5   6   7   8   >