[Crash-utility] [PATCH] kmem: update n option to dump memory block
From: Masayoshi Mizuma Update for the "kmem -n" option to also dump memory block. Currently, "kmem -n" shows the memory section only. This patch gets available the memory block as well if 'memory_block' structure and 'memory_subsys' symbol exist. The memory block information is useful to investigate memory hot-plug issue. Signed-off-by: Masayoshi Mizuma --- defs.h | 8 ++ memory.c | 412 +-- 2 files changed, 379 insertions(+), 41 deletions(-) diff --git a/defs.h b/defs.h index 5b64bb7..f707c64 100644 --- a/defs.h +++ b/defs.h @@ -2049,6 +2049,14 @@ struct offset_table {/* stash of commonly-used offsets */ long pci_bus_self; long device_kobj; long kobject_name; + long memory_block_dev; + long memory_block_start_section_nr; + long mem_section_pageblock_flags; + long memory_block_state; + long memory_block_nid; + long bus_type_p; + long device_private_device; + long device_private_knode_bus; }; struct size_table { /* stash of commonly-used sizes */ diff --git a/memory.c b/memory.c index ea25047..c7a4787 100644 --- a/memory.c +++ b/memory.c @@ -254,14 +254,16 @@ static void PG_reserved_flag_init(void); static void PG_slab_flag_init(void); static ulong nr_blockdev_pages(void); void sparse_mem_init(void); -void dump_mem_sections(int); +void dump_mem_block_and_sections(int); +void _dump_mem_block_and_sections(void); +void dump_mem_sections(void); void list_mem_sections(void); ulong sparse_decode_mem_map(ulong, ulong); char *read_mem_section(ulong); ulong nr_to_section(ulong); int valid_section(ulong); int section_has_mem_map(ulong); -ulong section_mem_map_addr(ulong); +ulong section_mem_map_addr(ulong, int); ulong valid_section_nr(ulong); ulong pfn_to_map(ulong); static int get_nodes_online(void); @@ -5528,7 +5530,7 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi) pc->curcmd_flags |= HEADER_PRINTED; } - pp = section_mem_map_addr(section); + pp = section_mem_map_addr(section, 0); pp = sparse_decode_mem_map(pp, section_nr); phys = (physaddr_t) section_nr * PAGES_PER_SECTION() * PAGESIZE(); section_size = PAGES_PER_SECTION(); @@ -13389,7 +13391,7 @@ is_page_ptr(ulong addr, physaddr_t *phys) nr_mem_sections = vt->max_mem_section_nr+1; for (nr = 0; nr < nr_mem_sections ; nr++) { if ((sec_addr = valid_section_nr(nr))) { - coded_mem_map = section_mem_map_addr(sec_addr); + coded_mem_map = section_mem_map_addr(sec_addr, 0); mem_map = sparse_decode_mem_map(coded_mem_map, nr); end_mem_map = mem_map + (PAGES_PER_SECTION() * SIZE(page)); @@ -16355,7 +16357,7 @@ dump_memory_nodes(int initialize) } if (IS_SPARSEMEM()) - dump_mem_sections(initialize); + dump_mem_block_and_sections(initialize); } /* @@ -17140,7 +17142,7 @@ section_has_mem_map(ulong addr) } ulong -section_mem_map_addr(ulong addr) +section_mem_map_addr(ulong addr, int raw) { char *mem_section; ulong map; @@ -17148,7 +17150,8 @@ section_mem_map_addr(ulong addr) if ((mem_section = read_mem_section(addr))) { map = ULONG(mem_section + OFFSET(mem_section_section_mem_map)); - map &= SECTION_MAP_MASK; + if (!raw) + map &= SECTION_MAP_MASK; return map; } return 0; @@ -17179,7 +17182,7 @@ pfn_to_map(ulong pfn) if (section_has_mem_map(section)) { page_offset = pfn - section_nr_to_pfn(section_nr); - coded_mem_map = section_mem_map_addr(section); + coded_mem_map = section_mem_map_addr(section, 0); mem_map = sparse_decode_mem_map(coded_mem_map, section_nr) + (page_offset * SIZE(page)); return mem_map; @@ -17188,16 +17191,365 @@ pfn_to_map(ulong pfn) return 0; } -void -dump_mem_sections(int initialize) +struct memory_block_info { + ulong memory_block; + ulong start_sec; + ulong start_pfn; + ulong nid; + char state[24]; + char name[32]; +}; + +#define MIN_MEMORY_BLOCK_SIZE (1UL << _SECTION_SIZE_BITS) + +#define MEM_ONLINE (1<<0) +#define MEM_GOING_OFFLINE (1<<1) +#define MEM_OFFLINE (1<<2) +#define MEM_GOING_ONLINE(1<<3) +#define MEM_CANCEL_ONLINE (1<<4) +#define MEM_CANCEL_OFFLINE (1<<5) + +static void +fill_memory_block_state(ulong memblock, char *buf) { - ulong nr, max, addr; - ulong nr_mem_sections; + ulong state; + + memset(buf, 0, sizeof(*buf) * BUFSIZE); + +
[Crash-utility] [PATCH 1/2] ppc64/opal: add a flag to determine if the kernel is running on OPAL firmware
Add PPC64 specific flag for kernels running on platforms based on OPAL firmware. Use this flag before processing commands specific to OPAL based systems. Signed-off-by: Hari Bathini --- defs.h |8 ppc64.c | 36 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/defs.h b/defs.h index 5b64bb7..567992e 100644 --- a/defs.h +++ b/defs.h @@ -5963,6 +5963,12 @@ struct ppc64_elf_prstatus { #ifdef PPC64 +struct ppc64_opal { + uint64_t base; + uint64_t entry; + uint64_t size; +}; + struct ppc64_vmemmap { unsigned long phys; unsigned long virt; @@ -6013,6 +6019,7 @@ struct machine_specific { ulong _page_accessed; int (*is_kvaddr)(ulong); int (*is_vmaddr)(ulong); + struct ppc64_opal opal; }; void ppc64_init(int); @@ -6030,6 +6037,7 @@ void ppc64_dump_machdep_table(ulong); * in the kernel is also 0x40. */ #define RADIX_MMU (0x40) +#define OPAL_FW (0x80) #define REGION_SHIFT (60UL) #define REGION_ID(addr)(((unsigned long)(addr)) >> REGION_SHIFT) diff --git a/ppc64.c b/ppc64.c index ee2f76f..cf41765 100644 --- a/ppc64.c +++ b/ppc64.c @@ -241,6 +241,7 @@ struct machine_specific book3e_machine_specific = { .is_vmaddr = book3e_is_vmaddr, }; +#define SKIBOOT_BASE 0x3000 /* * Do all necessary machine-specific setup here. This is called several @@ -362,6 +363,16 @@ ppc64_init(int when) struct machine_specific *m = machdep->machspec; /* +* To determine if the kernel was running on OPAL based platform, +* use struct opal, which is populated with relevant values. +*/ + if (symbol_exists("opal")) { + get_symbol_data("opal", sizeof(struct ppc64_opal), &(m->opal)); + if (m->opal.base == SKIBOOT_BASE) + machdep->flags |= OPAL_FW; + } + + /* * On Power ISA 3.0 based server processors, a kernel can * run with radix MMU or standard MMU. Set the flag, * if it is radix MMU. @@ -712,6 +723,8 @@ ppc64_dump_machdep_table(ulong arg) fprintf(fp, "%sSWAP_ENTRY_L4", others++ ? "|" : ""); if (machdep->flags & RADIX_MMU) fprintf(fp, "%sRADIX_MMU", others++ ? "|" : ""); + if (machdep->flags & OPAL_FW) + fprintf(fp, "%sOPAL_FW", others++ ? "|" : ""); fprintf(fp, ")\n"); fprintf(fp, " kvbase: %lx\n", machdep->kvbase); @@ -2828,7 +2841,6 @@ ppc64_get_smp_cpus(void) */ #define SKIBOOT_CONSOLE_DUMP_START 0x3100 #define SKIBOOT_CONSOLE_DUMP_SIZE 0x10 -#define SKIBOOT_BASE 0x3000 #define ASCII_UNLIMITED ((ulong)(-1) >> 1) void @@ -2841,10 +2853,6 @@ opalmsg(void) uint64_t u64; uint64_t limit64; }; - struct opal { - unsigned long long base; - unsigned long long entry; - } opal; int i, a; size_t typesz; void *location; @@ -2856,25 +2864,13 @@ opalmsg(void) long count = SKIBOOT_CONSOLE_DUMP_SIZE; ulonglong addr = SKIBOOT_CONSOLE_DUMP_START; + if (!(machdep->flags & OPAL_FW)) + error(FATAL, "dump was not captured on OPAL based system"); + if (CRASHDEBUG(4)) fprintf(fp, "\n", addr, count, "PHYSADDR"); - /* -* OPAL based platform check -* struct opal of BSS section and hence default value will be ZERO(0) -* opal_init() in the kernel initializes this structure based on -* the platform. Use it as a key to determine whether the dump -* was taken on an OPAL based system or not. -*/ - if (symbol_exists("opal")) { - get_symbol_data("opal", sizeof(struct opal), ); - if (opal.base != SKIBOOT_BASE) - error(FATAL, "dump was captured on non-PowerNV machine"); - } else { - error(FATAL, "dump was captured on non-PowerNV machine"); - } - BZERO(, sizeof(struct memloc)); lost = typesz = per_line = 0; location = NULL; -- Crash-utility mailing list Crash-utility@redhat.com https://www.redhat.com/mailman/listinfo/crash-utility
[Crash-utility] [PATCH 2/2] ppc64/opal: Improve bt output when R1 falls in OPAL range
On OPAL based systems, when a thread is running an OPAL API, the stack pointer and instruction pointer would be pointing at OPAL address but 'bt' output for such thread would complain that the stack pointer is invalid. Update error/log message for better context. Signed-off-by: Hari Bathini --- ppc64.c | 36 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/ppc64.c b/ppc64.c index cf41765..041480b 100644 --- a/ppc64.c +++ b/ppc64.c @@ -65,8 +65,26 @@ static ulong hugepage_dir(ulong pte); static ulong pgd_page_vaddr_l4(ulong pgd); static ulong pud_page_vaddr_l4(ulong pud); static ulong pmd_page_vaddr_l4(ulong pmd); +static int is_opal_context(ulong sp, ulong nip); void opalmsg(void); +static int is_opal_context(ulong sp, ulong nip) +{ + uint64_t opal_start, opal_end; + + if (!(machdep->flags & OPAL_FW)) + return FALSE; + + opal_start = machdep->machspec->opal.base; + opal_end = opal_start + machdep->machspec->opal.size; + + if (((sp >= opal_start) && (sp < opal_end)) || + ((nip >= opal_start) && (nip < opal_end))) + return TRUE; + + return FALSE; +} + static inline int is_hugepage(ulong pte) { if ((machdep->flags & BOOK3E) || @@ -2270,7 +2288,11 @@ ppc64_vmcore_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp) { struct ppc64_pt_regs *pt_regs; unsigned long unip; - int in_user_space = FALSE; + /* +* TRUE: task is running in a different context (userspace, OPAL..) +* FALSE: task is probably running in kernel space. +*/ + int out_of_context = FALSE; pt_regs = (struct ppc64_pt_regs *)bt_in->machdep; if (!pt_regs || !pt_regs->gpr[1]) { @@ -2283,20 +2305,25 @@ ppc64_vmcore_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp) bt_in->task); return FALSE; } + *ksp = pt_regs->gpr[1]; if (IS_KVADDR(*ksp)) { readmem(*ksp+16, KVADDR, , sizeof(ulong), "Regs NIP value", FAULT_ON_ERROR); *nip = unip; } else { + *nip = pt_regs->nip; if (IN_TASK_VMA(bt_in->task, *ksp)) { fprintf(fp, "%0lx: Task is running in user space\n", bt_in->task); - in_user_space = TRUE; + out_of_context = TRUE; + } else if (is_opal_context(*ksp, *nip)) { + fprintf(fp, "%0lx: Task is running in OPAL (firmware) context\n", + bt_in->task); + out_of_context = TRUE; } else fprintf(fp, "%0lx: Invalid Stack Pointer %0lx\n", bt_in->task, *ksp); - *nip = pt_regs->nip; } if (bt_in->flags && @@ -2307,7 +2334,8 @@ ppc64_vmcore_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp) * Print the collected regs for the active task */ ppc64_print_regs(pt_regs); - if (in_user_space) + + if (out_of_context) return TRUE; if (!IS_KVADDR(*ksp)) return FALSE; -- Crash-utility mailing list Crash-utility@redhat.com https://www.redhat.com/mailman/listinfo/crash-utility
Re: [Crash-utility] [PATCH] cmdline: Add a new "--machdep stacksize=".
- Original Message - > Implemented support for 16k stack size that was introduced by commit > 6538b8ea886e472f4431db8ca1d60478f838d14b titled "x86_64: expand kernel > stack to 16K". > Without the patch, kernels has 16k stack, leading to errors in commands > such as "bt" and any command regarding 8K stack. > Add a new "--machdep stacksize=" option that can be used to > override the default machdep->stacksize value which is 8k. The x86_64 default value of 8K is basically a leftover value that each of the architectures originally used for setting machdep->stacksize. But for quite some time now, those values should get overridden later on here in task_init(): STRUCT_SIZE_INIT(task_union, "task_union"); STRUCT_SIZE_INIT(thread_union, "thread_union"); if (VALID_SIZE(task_union) && (SIZE(task_union) != STACKSIZE())) { error(WARNING, "\nnon-standard stack size: %ld\n", len = SIZE(task_union)); machdep->stacksize = len; } else if (VALID_SIZE(thread_union) && ((len = SIZE(thread_union)) != STACKSIZE())) { machdep->stacksize = len; } else if (!VALID_SIZE(thread_union) && !VALID_SIZE(task_union)) { if (kernel_symbol_exists("__start_init_task") && kernel_symbol_exists("__end_init_task")) { len = symbol_value("__end_init_task"); len -= symbol_value("__start_init_task"); ASSIGN_SIZE(thread_union) = len; machdep->stacksize = len; } } As of Linux 4.18 at least, x86_64 still uses the thread_union declaration. For example: crash> thread_union union thread_union { struct task_struct task; unsigned long stack[2048]; } SIZE: 16384 crash> On what kernel version are you seeing the obsolete 8k stacksize being used? What does the command above show on your system? Thanks, Dave > > Signed-off-by: Sean Fu > --- > x86_64.c | 9 + > 1 file changed, 9 insertions(+) > > diff --git a/x86_64.c b/x86_64.c > index 7d01140..1798f05 100644 > --- a/x86_64.c > +++ b/x86_64.c > @@ -5716,6 +5716,15 @@ parse_cmdline_args(void) > continue; > } > } > + } else if (STRNEQ(arglist[i], "stacksize=")) { > + p = arglist[i] + strlen("stacksize="); > + if (strlen(p)) { > + value = stol(p, RETURN_ON_ERROR|QUIET, > ); > + if (!errflag) { > + machdep->stacksize = value; > + continue; > + } > + } > } > > error(WARNING, "ignoring --machdep option: %s\n", > arglist[i]); > -- > 2.6.2 > > -- Crash-utility mailing list Crash-utility@redhat.com https://www.redhat.com/mailman/listinfo/crash-utility
Re: [Crash-utility] [PATCH] cmdline: Add a new "--machdep stacksize=".
Sean Fu wrote on Sat, Sep 29, 2018: > Implemented support for 16k stack size that was introduced by commit > 6538b8ea886e472f4431db8ca1d60478f838d14b titled "x86_64: expand kernel > stack to 16K". > Without the patch, kernels has 16k stack, leading to errors in commands > such as "bt" and any command regarding 8K stack. > Add a new "--machdep stacksize=" option that can be used to > override the default machdep->stacksize value which is 8k. Instead of making that an option it could be possible to autodetect this by looking at __start_init_task / __end_init_task symbols, the difference should be the proper size (the symbols have been around since 91ed140d6c1e168b11bbbddac4f6066f40a0c6b5 in 4.7 so that might not be old enough for you though, as your commit dates 3.15 ; but there might be other methods of getting stack size I haven't thought of, I only grepped in a recent kernel) -- Dominique Martinet -- Crash-utility mailing list Crash-utility@redhat.com https://www.redhat.com/mailman/listinfo/crash-utility
[Crash-utility] [PATCH] cmdline: Add a new "--machdep stacksize=".
Implemented support for 16k stack size that was introduced by commit 6538b8ea886e472f4431db8ca1d60478f838d14b titled "x86_64: expand kernel stack to 16K". Without the patch, kernels has 16k stack, leading to errors in commands such as "bt" and any command regarding 8K stack. Add a new "--machdep stacksize=" option that can be used to override the default machdep->stacksize value which is 8k. Signed-off-by: Sean Fu --- x86_64.c | 9 + 1 file changed, 9 insertions(+) diff --git a/x86_64.c b/x86_64.c index 7d01140..1798f05 100644 --- a/x86_64.c +++ b/x86_64.c @@ -5716,6 +5716,15 @@ parse_cmdline_args(void) continue; } } + } else if (STRNEQ(arglist[i], "stacksize=")) { + p = arglist[i] + strlen("stacksize="); + if (strlen(p)) { + value = stol(p, RETURN_ON_ERROR|QUIET, ); + if (!errflag) { + machdep->stacksize = value; + continue; + } + } } error(WARNING, "ignoring --machdep option: %s\n", arglist[i]); -- 2.6.2 -- Crash-utility mailing list Crash-utility@redhat.com https://www.redhat.com/mailman/listinfo/crash-utility