[Crash-utility] [PATCH] kmem: update n option to dump memory block

2018-10-01 Thread Masayoshi Mizuma
From: Masayoshi Mizuma 

Update for the "kmem -n" option to also dump memory block.
Currently, "kmem -n" shows the memory section only. This
patch gets available the memory block as well if 'memory_block'
structure and 'memory_subsys' symbol exist.
The memory block information is useful to investigate memory
hot-plug issue.

Signed-off-by: Masayoshi Mizuma 
---
 defs.h   |   8 ++
 memory.c | 412 +--
 2 files changed, 379 insertions(+), 41 deletions(-)

diff --git a/defs.h b/defs.h
index 5b64bb7..f707c64 100644
--- a/defs.h
+++ b/defs.h
@@ -2049,6 +2049,14 @@ struct offset_table {/* stash of 
commonly-used offsets */
 long pci_bus_self;
long device_kobj;
long kobject_name;
+   long memory_block_dev;
+   long memory_block_start_section_nr;
+   long mem_section_pageblock_flags;
+   long memory_block_state;
+   long memory_block_nid;
+   long bus_type_p;
+   long device_private_device;
+   long device_private_knode_bus;
 };
 
 struct size_table { /* stash of commonly-used sizes */
diff --git a/memory.c b/memory.c
index ea25047..c7a4787 100644
--- a/memory.c
+++ b/memory.c
@@ -254,14 +254,16 @@ static void PG_reserved_flag_init(void);
 static void PG_slab_flag_init(void);
 static ulong nr_blockdev_pages(void);
 void sparse_mem_init(void);
-void dump_mem_sections(int);
+void dump_mem_block_and_sections(int);
+void _dump_mem_block_and_sections(void);
+void dump_mem_sections(void);
 void list_mem_sections(void);
 ulong sparse_decode_mem_map(ulong, ulong);
 char *read_mem_section(ulong);
 ulong nr_to_section(ulong);
 int valid_section(ulong);
 int section_has_mem_map(ulong);
-ulong section_mem_map_addr(ulong);
+ulong section_mem_map_addr(ulong, int);
 ulong valid_section_nr(ulong);
 ulong pfn_to_map(ulong);
 static int get_nodes_online(void);
@@ -5528,7 +5530,7 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
pc->curcmd_flags |= HEADER_PRINTED;
}
 
-   pp = section_mem_map_addr(section);
+   pp = section_mem_map_addr(section, 0);
pp = sparse_decode_mem_map(pp, section_nr);
phys = (physaddr_t) section_nr * PAGES_PER_SECTION() * 
PAGESIZE();
section_size = PAGES_PER_SECTION();
@@ -13389,7 +13391,7 @@ is_page_ptr(ulong addr, physaddr_t *phys)
nr_mem_sections = vt->max_mem_section_nr+1;
for (nr = 0; nr < nr_mem_sections ; nr++) {
if ((sec_addr = valid_section_nr(nr))) {
-   coded_mem_map = section_mem_map_addr(sec_addr);
+   coded_mem_map = section_mem_map_addr(sec_addr, 
0);
mem_map = sparse_decode_mem_map(coded_mem_map, 
nr);
end_mem_map = mem_map + (PAGES_PER_SECTION() * 
SIZE(page));
 
@@ -16355,7 +16357,7 @@ dump_memory_nodes(int initialize)
}
 
if (IS_SPARSEMEM())
-   dump_mem_sections(initialize);
+   dump_mem_block_and_sections(initialize);
 }
 
 /*
@@ -17140,7 +17142,7 @@ section_has_mem_map(ulong addr)
 }
 
 ulong 
-section_mem_map_addr(ulong addr)
+section_mem_map_addr(ulong addr, int raw)
 {   
char *mem_section;
ulong map;
@@ -17148,7 +17150,8 @@ section_mem_map_addr(ulong addr)
if ((mem_section = read_mem_section(addr))) {
map = ULONG(mem_section + 
OFFSET(mem_section_section_mem_map));
-   map &= SECTION_MAP_MASK;
+   if (!raw)
+   map &= SECTION_MAP_MASK;
return map;
}
return 0;
@@ -17179,7 +17182,7 @@ pfn_to_map(ulong pfn)
 
if (section_has_mem_map(section)) {
page_offset = pfn - section_nr_to_pfn(section_nr);
-   coded_mem_map = section_mem_map_addr(section);
+   coded_mem_map = section_mem_map_addr(section, 0);
mem_map = sparse_decode_mem_map(coded_mem_map, section_nr) +
(page_offset * SIZE(page));
return mem_map;
@@ -17188,16 +17191,365 @@ pfn_to_map(ulong pfn)
return 0;
 }
 
-void 
-dump_mem_sections(int initialize)
+struct memory_block_info {
+   ulong memory_block;
+   ulong start_sec;
+   ulong start_pfn;
+   ulong nid;
+   char state[24];
+   char name[32];
+};
+
+#define MIN_MEMORY_BLOCK_SIZE (1UL << _SECTION_SIZE_BITS)
+
+#define MEM_ONLINE  (1<<0)
+#define MEM_GOING_OFFLINE   (1<<1)
+#define MEM_OFFLINE (1<<2)
+#define MEM_GOING_ONLINE(1<<3)
+#define MEM_CANCEL_ONLINE   (1<<4)
+#define MEM_CANCEL_OFFLINE  (1<<5)
+
+static void
+fill_memory_block_state(ulong memblock, char *buf)
 {
-   ulong nr, max, addr;
-   ulong nr_mem_sections;
+   ulong state;
+
+   memset(buf, 0, sizeof(*buf) * BUFSIZE);
+
+   

[Crash-utility] [PATCH 1/2] ppc64/opal: add a flag to determine if the kernel is running on OPAL firmware

2018-10-01 Thread Hari Bathini
Add PPC64 specific flag for kernels running on platforms based on
OPAL firmware. Use this flag before processing commands specific to
OPAL based systems.

Signed-off-by: Hari Bathini 
---
 defs.h  |8 
 ppc64.c |   36 
 2 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/defs.h b/defs.h
index 5b64bb7..567992e 100644
--- a/defs.h
+++ b/defs.h
@@ -5963,6 +5963,12 @@ struct ppc64_elf_prstatus {
 
 #ifdef PPC64
 
+struct ppc64_opal {
+   uint64_t base;
+   uint64_t entry;
+   uint64_t size;
+};
+
 struct ppc64_vmemmap {
 unsigned long phys;
 unsigned long virt;
@@ -6013,6 +6019,7 @@ struct machine_specific {
ulong _page_accessed;
int (*is_kvaddr)(ulong);
int (*is_vmaddr)(ulong);
+   struct ppc64_opal opal;
 };
 
 void ppc64_init(int);
@@ -6030,6 +6037,7 @@ void ppc64_dump_machdep_table(ulong);
  * in the kernel is also 0x40.
  */
 #define RADIX_MMU   (0x40)
+#define OPAL_FW (0x80)
 
 #define REGION_SHIFT   (60UL)
 #define REGION_ID(addr)(((unsigned long)(addr)) >> REGION_SHIFT)
diff --git a/ppc64.c b/ppc64.c
index ee2f76f..cf41765 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -241,6 +241,7 @@ struct machine_specific book3e_machine_specific = {
.is_vmaddr = book3e_is_vmaddr,
 };
 
+#define SKIBOOT_BASE   0x3000
 
 /*
  *  Do all necessary machine-specific setup here.  This is called several
@@ -362,6 +363,16 @@ ppc64_init(int when)
struct machine_specific *m = machdep->machspec;
 
/*
+* To determine if the kernel was running on OPAL based 
platform,
+* use struct opal, which is populated with relevant 
values.
+*/
+   if (symbol_exists("opal")) {
+   get_symbol_data("opal", sizeof(struct 
ppc64_opal), &(m->opal));
+   if (m->opal.base == SKIBOOT_BASE)
+   machdep->flags |= OPAL_FW;
+   }
+
+   /*
 * On Power ISA 3.0 based server processors, a kernel 
can
 * run with radix MMU or standard MMU. Set the flag,
 * if it is radix MMU.
@@ -712,6 +723,8 @@ ppc64_dump_machdep_table(ulong arg)
fprintf(fp, "%sSWAP_ENTRY_L4", others++ ? "|" : "");
if (machdep->flags & RADIX_MMU)
fprintf(fp, "%sRADIX_MMU", others++ ? "|" : "");
+   if (machdep->flags & OPAL_FW)
+   fprintf(fp, "%sOPAL_FW", others++ ? "|" : "");
 fprintf(fp, ")\n");
 
fprintf(fp, " kvbase: %lx\n", machdep->kvbase);
@@ -2828,7 +2841,6 @@ ppc64_get_smp_cpus(void)
  */
 #define SKIBOOT_CONSOLE_DUMP_START 0x3100
 #define SKIBOOT_CONSOLE_DUMP_SIZE  0x10
-#define SKIBOOT_BASE   0x3000
 #define ASCII_UNLIMITED ((ulong)(-1) >> 1)
 
 void
@@ -2841,10 +2853,6 @@ opalmsg(void)
uint64_t u64;
uint64_t limit64;
};
-   struct opal {
-   unsigned long long base;
-   unsigned long long entry;
-   } opal;
int i, a;
size_t typesz;
void *location;
@@ -2856,25 +2864,13 @@ opalmsg(void)
long count = SKIBOOT_CONSOLE_DUMP_SIZE;
ulonglong addr = SKIBOOT_CONSOLE_DUMP_START;
 
+   if (!(machdep->flags & OPAL_FW))
+   error(FATAL, "dump was not captured on OPAL based system");
+
if (CRASHDEBUG(4))
fprintf(fp, "\n",
addr, count, "PHYSADDR");
 
-   /*
-* OPAL based platform check
-* struct opal of BSS section and hence default value will be ZERO(0)
-* opal_init() in the kernel initializes this structure based on
-* the platform. Use it as a key to determine whether the dump
-* was taken on an OPAL based system or not.
-*/
-   if (symbol_exists("opal")) {
-   get_symbol_data("opal", sizeof(struct opal), );
-   if (opal.base != SKIBOOT_BASE)
-   error(FATAL, "dump was captured on non-PowerNV 
machine");
-   } else {
-   error(FATAL, "dump was captured on non-PowerNV machine");
-   }
-
BZERO(, sizeof(struct memloc));
lost = typesz = per_line = 0;
location = NULL;

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility


[Crash-utility] [PATCH 2/2] ppc64/opal: Improve bt output when R1 falls in OPAL range

2018-10-01 Thread Hari Bathini
On OPAL based systems, when a thread is running an OPAL API, the stack
pointer and instruction pointer would be pointing at OPAL address but
'bt' output for such thread would complain that the stack pointer is
invalid. Update error/log message for better context.

Signed-off-by: Hari Bathini 
---
 ppc64.c |   36 
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/ppc64.c b/ppc64.c
index cf41765..041480b 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -65,8 +65,26 @@ static ulong hugepage_dir(ulong pte);
 static ulong pgd_page_vaddr_l4(ulong pgd);
 static ulong pud_page_vaddr_l4(ulong pud);
 static ulong pmd_page_vaddr_l4(ulong pmd);
+static int is_opal_context(ulong sp, ulong nip);
 void opalmsg(void);
 
+static int is_opal_context(ulong sp, ulong nip)
+{
+   uint64_t opal_start, opal_end;
+
+   if (!(machdep->flags & OPAL_FW))
+   return FALSE;
+
+   opal_start = machdep->machspec->opal.base;
+   opal_end   = opal_start + machdep->machspec->opal.size;
+
+   if (((sp >= opal_start) && (sp < opal_end)) ||
+   ((nip >= opal_start) && (nip < opal_end)))
+   return TRUE;
+
+   return FALSE;
+}
+
 static inline int is_hugepage(ulong pte)
 {
if ((machdep->flags & BOOK3E) ||
@@ -2270,7 +2288,11 @@ ppc64_vmcore_stack_frame(struct bt_info *bt_in, ulong 
*nip, ulong *ksp)
 {
struct ppc64_pt_regs *pt_regs;
unsigned long unip;
-   int in_user_space = FALSE;
+   /*
+* TRUE: task is running in a different context (userspace, OPAL..)
+* FALSE: task is probably running in kernel space.
+*/
+   int out_of_context = FALSE;
 
pt_regs = (struct ppc64_pt_regs *)bt_in->machdep;
if (!pt_regs || !pt_regs->gpr[1]) {
@@ -2283,20 +2305,25 @@ ppc64_vmcore_stack_frame(struct bt_info *bt_in, ulong 
*nip, ulong *ksp)
bt_in->task);
return FALSE;
}
+
*ksp = pt_regs->gpr[1];
if (IS_KVADDR(*ksp)) {
readmem(*ksp+16, KVADDR, , sizeof(ulong), "Regs NIP value",
FAULT_ON_ERROR);
*nip = unip;
} else {
+   *nip = pt_regs->nip;
if (IN_TASK_VMA(bt_in->task, *ksp)) {
fprintf(fp, "%0lx: Task is running in user space\n",
bt_in->task);
-   in_user_space = TRUE;
+   out_of_context = TRUE;
+   } else if (is_opal_context(*ksp, *nip)) {
+   fprintf(fp, "%0lx: Task is running in OPAL (firmware) 
context\n",
+   bt_in->task);
+   out_of_context = TRUE;
} else
fprintf(fp, "%0lx: Invalid Stack Pointer %0lx\n",
bt_in->task, *ksp);
-   *nip = pt_regs->nip;
}
 
if (bt_in->flags &&
@@ -2307,7 +2334,8 @@ ppc64_vmcore_stack_frame(struct bt_info *bt_in, ulong 
*nip, ulong *ksp)
 * Print the collected regs for the active task
 */
ppc64_print_regs(pt_regs);
-   if (in_user_space)
+
+   if (out_of_context)
return TRUE;
if (!IS_KVADDR(*ksp))
return FALSE;

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility


Re: [Crash-utility] [PATCH] cmdline: Add a new "--machdep stacksize=".

2018-10-01 Thread Dave Anderson



- Original Message -
> Implemented support for 16k stack size that was introduced by commit
> 6538b8ea886e472f4431db8ca1d60478f838d14b titled "x86_64: expand kernel
> stack to 16K".
> Without the patch, kernels has 16k stack, leading to errors in commands
> such as "bt" and any command regarding 8K stack.
> Add a new "--machdep stacksize=" option that can be used to
> override the default machdep->stacksize value which is 8k.

The x86_64 default value of 8K is basically a leftover value that each of 
the architectures originally used for setting machdep->stacksize.  But for 
quite some time now, those values should get overridden later on here
in task_init():

STRUCT_SIZE_INIT(task_union, "task_union");
STRUCT_SIZE_INIT(thread_union, "thread_union");

if (VALID_SIZE(task_union) && (SIZE(task_union) != STACKSIZE())) {
error(WARNING, "\nnon-standard stack size: %ld\n",
len = SIZE(task_union));
machdep->stacksize = len;
} else if (VALID_SIZE(thread_union) &&
((len = SIZE(thread_union)) != STACKSIZE())) {
machdep->stacksize = len;
} else if (!VALID_SIZE(thread_union) && !VALID_SIZE(task_union)) {
if (kernel_symbol_exists("__start_init_task") &&
kernel_symbol_exists("__end_init_task")) {
len = symbol_value("__end_init_task");
len -= symbol_value("__start_init_task");
ASSIGN_SIZE(thread_union) = len;
machdep->stacksize = len;
}
}

As of Linux 4.18 at least, x86_64 still uses the thread_union declaration.
For example:

  crash> thread_union
  union thread_union {
  struct task_struct task;
  unsigned long stack[2048];
  }
  SIZE: 16384
  crash>

On what kernel version are you seeing the obsolete 8k stacksize being used?
What does the command above show on your system?

Thanks,
  Dave



 
> 
> Signed-off-by: Sean Fu 
> ---
>  x86_64.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/x86_64.c b/x86_64.c
> index 7d01140..1798f05 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -5716,6 +5716,15 @@ parse_cmdline_args(void)
>   continue;
>   }
>   }
> + } else if (STRNEQ(arglist[i], "stacksize=")) {
> + p = arglist[i] + strlen("stacksize=");
> + if (strlen(p)) {
> + value = stol(p, RETURN_ON_ERROR|QUIET, 
> );
> + if (!errflag) {
> + machdep->stacksize = value;
> + continue;
> + }
> + }
>   }
>   
>   error(WARNING, "ignoring --machdep option: %s\n", 
> arglist[i]);
> --
> 2.6.2
> 
> 

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility


Re: [Crash-utility] [PATCH] cmdline: Add a new "--machdep stacksize=".

2018-10-01 Thread Dominique Martinet
Sean Fu wrote on Sat, Sep 29, 2018:
> Implemented support for 16k stack size that was introduced by commit
> 6538b8ea886e472f4431db8ca1d60478f838d14b titled "x86_64: expand kernel
> stack to 16K".
> Without the patch, kernels has 16k stack, leading to errors in commands
> such as "bt" and any command regarding 8K stack.
> Add a new "--machdep stacksize=" option that can be used to
> override the default machdep->stacksize value which is 8k.

Instead of making that an option it could be possible to autodetect this
by looking at __start_init_task / __end_init_task symbols, the
difference should be the proper size (the symbols have been around since
91ed140d6c1e168b11bbbddac4f6066f40a0c6b5 in 4.7 so that might not be old
enough for you though, as your commit dates 3.15 ; but there might be
other methods of getting stack size I haven't thought of, I only grepped
in a recent kernel)

-- 
Dominique Martinet

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility


[Crash-utility] [PATCH] cmdline: Add a new "--machdep stacksize=".

2018-10-01 Thread Sean Fu
Implemented support for 16k stack size that was introduced by commit
6538b8ea886e472f4431db8ca1d60478f838d14b titled "x86_64: expand kernel
stack to 16K".
Without the patch, kernels has 16k stack, leading to errors in commands
such as "bt" and any command regarding 8K stack.
Add a new "--machdep stacksize=" option that can be used to
override the default machdep->stacksize value which is 8k.

Signed-off-by: Sean Fu 
---
 x86_64.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/x86_64.c b/x86_64.c
index 7d01140..1798f05 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -5716,6 +5716,15 @@ parse_cmdline_args(void)
continue;
}
}
+   } else if (STRNEQ(arglist[i], "stacksize=")) {
+   p = arglist[i] + strlen("stacksize=");
+   if (strlen(p)) {
+   value = stol(p, RETURN_ON_ERROR|QUIET, 
);
+   if (!errflag) {
+   machdep->stacksize = value;
+   continue;
+   }
+   }
}

error(WARNING, "ignoring --machdep option: %s\n", 
arglist[i]);
-- 
2.6.2

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility