From: Hari Bathini <hbath...@linux.vnet.ibm.com> Firmware-assisted dump support is enabled for POWERNV platform in P9 firmware. Make the corresponding updates in kernel to enable fadump support on POWERNV platform.
Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com> --- arch/powerpc/Kconfig | 2 arch/powerpc/include/asm/opal-api.h | 4 arch/powerpc/include/asm/opal.h | 1 arch/powerpc/kernel/fadump.c | 397 ++++++++++++++++++++--- arch/powerpc/kernel/fadump_internal.h | 27 ++ arch/powerpc/platforms/powernv/Makefile | 1 arch/powerpc/platforms/powernv/opal-wrappers.S | 1 arch/powerpc/platforms/powernv/powernv_fadump.c | 337 ++++++++++++++++++++ arch/powerpc/platforms/powernv/powernv_fadump.h | 63 ++++ arch/powerpc/platforms/pseries/pseries_fadump.c | 8 10 files changed, 783 insertions(+), 58 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/powernv_fadump.c create mode 100644 arch/powerpc/platforms/powernv/powernv_fadump.h diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c32a181..d749f1f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -599,7 +599,7 @@ config CRASH_DUMP config FA_DUMP bool "Firmware-assisted dump" - depends on PPC64 && PPC_RTAS + depends on PPC64 && (PPC_RTAS || PPC_POWERNV) select CRASH_CORE select CRASH_DUMP help diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index d886a5b..75e8925 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -206,7 +206,8 @@ #define OPAL_NPU_TL_SET 161 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 -#define OPAL_LAST 165 +#define OPAL_CONFIGURE_FADUMP 167 +#define OPAL_LAST 167 /* Device tree flags */ @@ -1040,6 +1041,7 @@ enum OpalSysCooling { enum { OPAL_REBOOT_NORMAL = 0, OPAL_REBOOT_PLATFORM_ERROR = 1, + OPAL_REBOOT_MPIPL = 3, }; /* Argument to OPAL_PCI_TCE_KILL */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 03e1a92..4c1f483 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -43,6 +43,7 @@ int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn, uint64_t PE_handle); int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap, uint64_t rate_phys, uint32_t size); +int64_t opal_configure_fadump(uint64_t command, void *data, uint64_t data_size); int64_t opal_console_write(int64_t term_number, __be64 *length, const uint8_t *buffer); int64_t opal_console_read(int64_t term_number, __be64 *length, diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 88fafe1..a27e4af 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -53,8 +53,12 @@ static struct cma *fadump_cma; struct fadump_ops_t *fadump_ops; static DEFINE_MUTEX(fadump_mutex); -struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES]; +struct fadump_memory_range crash_memory_ranges[INIT_CRASHMEM_RANGES]; int crash_mem_ranges; +struct fadump_memory_range reserved_ranges[INIT_MEMBLOCK_REGIONS]; +int reserved_ranges_cnt; +struct fadump_memory_range memory_ranges[2 * INIT_MEMBLOCK_REGIONS]; +int memory_ranges_cnt; #ifdef CONFIG_CMA /* @@ -120,6 +124,9 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, if (strcmp(uname, "rtas") == 0) return pseries_dt_scan_fadump(&fw_dump, node); + if (strcmp(uname, "ibm,dump") == 0) + return powernv_dt_scan_fadump(&fw_dump, node); + return 0; } @@ -156,6 +163,8 @@ int is_fadump_active(void) /* Print firmware assisted dump configurations for debugging purpose. */ static void fadump_show_config(void) { + int i; + pr_debug("Support for firmware-assisted dump (fadump): %s\n", (fw_dump.fadump_supported ? "present" : "no support")); @@ -170,6 +179,13 @@ static void fadump_show_config(void) pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size); + pr_debug("Real memory region hole size : %lx\n", + fw_dump.boot_memory_hole_size); + pr_debug("Real meory regions count : %lx\n", fw_dump.rmr_regions_cnt); + for (i = 0; i < fw_dump.rmr_regions_cnt; i++) { + pr_debug("%d. RMR base = %lx, size = %lx\n", (i+1), + fw_dump.rmr_src_addr[i], fw_dump.rmr_src_size[i]); + } } /** @@ -243,6 +259,157 @@ static inline unsigned long fadump_calculate_reserve_size(void) return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM); } +static void __init fadump_get_reserved_ranges(void) +{ + unsigned long i, j, dt_root; + unsigned long long base, size; + struct fadump_memory_range tmp_range; + const __be32 *prop; + int len, idx; + + early_init_fdt_reserve_self(); + early_init_fdt_scan_reserved_mem(); + + dt_root = of_get_flat_dt_root(); + + prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len); + + if (!prop) + return; + + /* + * Each reserved range is an (address,size) pair, 2 cells each, + * totalling 4 cells per range. + */ + for (i = 0; i < len / (sizeof(*prop) * 4); i++) { + u64 base, size; + + base = of_read_number(prop + (i * 4) + 0, 2); + size = of_read_number(prop + (i * 4) + 2, 2); + if (!size) + continue; + + reserved_ranges[reserved_ranges_cnt].base = base; + reserved_ranges[reserved_ranges_cnt].size = size; + reserved_ranges_cnt++; + + } + + if (!reserved_ranges_cnt) + return; + + /* Sort the reserved ranges */ + for (i = 0; i < reserved_ranges_cnt; i++) { + idx = i; + for (j = i + 1; j < reserved_ranges_cnt; j++) { + if (reserved_ranges[idx].base > reserved_ranges[j].base) + idx = j; + } + if (idx != i) { + tmp_range = reserved_ranges[idx]; + reserved_ranges[idx] = reserved_ranges[i]; + reserved_ranges[i] = tmp_range; + } + } + + /* Merge adjacent reserved ranges */ + idx = 0; + for (i = 1; i < reserved_ranges_cnt; i++) { + base = reserved_ranges[i-1].base; + size = reserved_ranges[i-1].size; + if (reserved_ranges[i].base == (base + size)) + reserved_ranges[idx].size += reserved_ranges[i].size; + else { + idx++; + if (i == idx) + continue; + + reserved_ranges[idx] = reserved_ranges[i]; + } + } + reserved_ranges_cnt = idx + 1; +} + +static inline void fadump_add_memory_range(unsigned long long base, + unsigned long long size) +{ + if (!size) + return; + + pr_debug("memory_range[%d] [%#016llx-%#016llx), %#llx bytes\n", + memory_ranges_cnt, base, base + size, size); + memory_ranges[memory_ranges_cnt].base = base; + memory_ranges[memory_ranges_cnt].size = size; + memory_ranges_cnt++; +} + +static void fadump_setup_memory_ranges(void) +{ + unsigned long i, j; + unsigned long long base, end, size; + struct memblock_region *reg; + struct fadump_memory_range tmp_ranges[INIT_MEMBLOCK_REGIONS]; + int tmp_ranges_cnt; + + /* get memory ranges */ + tmp_ranges_cnt = 0; + for_each_memblock(memory, reg) { + tmp_ranges[tmp_ranges_cnt].base = (unsigned long long)reg->base; + tmp_ranges[tmp_ranges_cnt].size = (unsigned long long)reg->size; + tmp_ranges_cnt++; + } + + /* exclude reserved ranges */ + memory_ranges_cnt = 0; + for (i = 0; i < tmp_ranges_cnt; i++) { + unsigned long long mem_base, mem_end, rsrv_base, rsrv_end; + int add = 1; + + base = mem_base = tmp_ranges[i].base; + end = mem_end = base + tmp_ranges[i].size; + for (j = 0; j < reserved_ranges_cnt; j++) { + rsrv_base = reserved_ranges[j].base; + rsrv_end = rsrv_base + reserved_ranges[j].size; + + if (mem_base > rsrv_end) + continue; + + if ((j < (reserved_ranges_cnt - 1)) && + (reserved_ranges[j + 1].base < mem_end)) + mem_end = reserved_ranges[j + 1].base; + + if ((rsrv_base < mem_end) && (rsrv_end > mem_base)) { + if ((mem_base < rsrv_base) && + (mem_end > rsrv_end)) { + size = rsrv_base - mem_base; + fadump_add_memory_range(mem_base, size); + size = mem_end - rsrv_end; + fadump_add_memory_range(rsrv_end, size); + } else if (mem_base < rsrv_base) { + size = rsrv_base - mem_base; + fadump_add_memory_range(mem_base, size); + } else if (mem_end > rsrv_end) { + size = mem_end - rsrv_end; + fadump_add_memory_range(rsrv_end, size); + } + + add = 0; + } + + if (mem_end == end) + break; + + mem_base = mem_end; + mem_end = end; + add = 1; + } + + if (add) + fadump_add_memory_range(mem_base, mem_end - mem_base); + } +} + + /* * Calculate the total memory size required to be reserved for * firmware-assisted dump registration. @@ -264,24 +431,113 @@ static unsigned long get_fadump_area_size(void) return size; } -static void __init fadump_reserve_crash_area(unsigned long base, - unsigned long size) +/* Preserve everything above the base address */ +static void __init fadump_reserve_crash_area(unsigned long base) { - struct memblock_region *reg; - unsigned long mstart, mend, msize; + int i; + unsigned long mstart, msize; - for_each_memblock(memory, reg) { - mstart = max_t(unsigned long, base, reg->base); - mend = reg->base + reg->size; - mend = min(base + size, mend); - - if (mstart < mend) { - msize = mend - mstart; - memblock_reserve(mstart, msize); - pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n", - (msize >> 20), mstart); + for (i = 0; i < memory_ranges_cnt; i++) { + mstart = memory_ranges[i].base; + msize = memory_ranges[i].size; + if ((mstart + msize) < base) + continue; + + if (mstart < base) { + msize -= (base - mstart); + mstart = base; } + pr_info("Reserving %luMB of memory at %#016lx for saving crash dump", + (msize >> 20), mstart); + memblock_reserve(mstart, msize); + } +} + +static int __init add_rmr_region(unsigned long rmr_start, + unsigned long rmr_size) +{ + int i = fw_dump.rmr_regions_cnt++; + + if (fw_dump.rmr_regions_cnt > MAX_REAL_MEM_REGIONS) + return 0; + + pr_debug("Added real memory range[%d] [%#016lx-%#016lx)\n", + i, rmr_start, (rmr_start + rmr_size)); + fw_dump.rmr_src_addr[i] = rmr_start; + fw_dump.rmr_src_size[i] = rmr_size; + return 1; +} + +/* + * Platforms like PowerNV have an upper limit on the size. + * If 'rmr_size' is bigger than that limit, split this memory range + * into multiple entries. + */ +static int __init add_rmr_regions(unsigned long rmr_start, + unsigned long rmr_size) +{ + unsigned long rstart, rsize, max_size; + int ret = 1; + + rstart = rmr_start; + max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : rmr_size; + while (rmr_size) { + if (rmr_size > max_size) + rsize = max_size; + else + rsize = rmr_size; + + ret = add_rmr_region(rstart, rsize); + if (!ret) + break; + + rmr_size -= rsize; + rstart += rsize; } + + return ret; +} + +static int __init fadump_get_rmr_regions(void) +{ + int i, ret = 1; + unsigned long base, size, last_end; + unsigned long mem_size = fw_dump.boot_memory_size; + + fw_dump.rmr_regions_cnt = 0; + fw_dump.boot_memory_hole_size = 0; + + /* + * TODO: Extent support for multiple real memory regions on + * pseries platform too. + */ + if (fw_dump.fadump_platform == FADUMP_PLATFORM_PSERIES) { + ret = add_rmr_regions(RMA_START, fw_dump.boot_memory_size); + return ret; + } + + last_end = memory_ranges[0].base; + for (i = 0; i < memory_ranges_cnt; i++) { + base = memory_ranges[i].base; + size = memory_ranges[i].size; + + if (base > last_end) + fw_dump.boot_memory_hole_size += (base - last_end); + + if (size >= mem_size) { + ret = add_rmr_regions(base, mem_size); + break; + } + + mem_size -= size; + ret = add_rmr_regions(base, size); + if (!ret) + break; + + last_end = base + size; + } + + return ret; } int __init fadump_reserve_mem(void) @@ -297,6 +553,10 @@ int __init fadump_reserve_mem(void) fw_dump.fadump_enabled = 0; return 0; } + + fadump_get_reserved_ranges(); + fadump_setup_memory_ranges(); + /* * Initialize boot memory size * If dump is active then we have already calculated the size during @@ -311,6 +571,11 @@ int __init fadump_reserve_mem(void) FADUMP_CMA_ALIGNMENT); #endif fw_dump.rmr_source_len = fw_dump.boot_memory_size; + if (!fadump_get_rmr_regions()) { + fw_dump.fadump_enabled = 0; + pr_err("Too many holes in boot memory area to enable fadump\n"); + return 0; + } } size = get_fadump_area_size(); @@ -335,6 +600,7 @@ int __init fadump_reserve_mem(void) else memory_boundary = memblock_end_of_DRAM(); + base = fw_dump.boot_memory_size + fw_dump.boot_memory_hole_size; if (fw_dump.dump_active) { #ifdef CONFIG_HUGETLB_PAGE /* @@ -348,11 +614,9 @@ int __init fadump_reserve_mem(void) * If last boot has crashed then reserve all the memory * above boot_memory_size so that we don't touch it until * dump is written to disk by userspace tool. This memory - * will be released for general use once the dump is saved. + * can be released for general use by invalidating fadump. */ - base = fw_dump.boot_memory_size; - size = memory_boundary - base; - fadump_reserve_crash_area(base, size); + fadump_reserve_crash_area(base); fw_dump.fadumphdr_addr = fadump_ops->get_meta_area_start(&fw_dump); @@ -366,23 +630,22 @@ int __init fadump_reserve_mem(void) * use memblock_find_in_range() here since it doesn't allocate * from bottom to top. */ - for (base = fw_dump.boot_memory_size; - base <= (memory_boundary - size); - base += size) { + while (base <= (memory_boundary - size)) { if (memblock_is_region_memory(base, size) && !memblock_is_region_reserved(base, size)) break; + + base += size; } + if ((base > (memory_boundary - size)) || memblock_reserve(base, size)) { pr_err("Failed to reserve memory\n"); return 0; } - pr_info("Reserved %ldMB of memory at %ldMB for firmware-" - "assisted dump (System RAM: %ldMB)\n", - (unsigned long)(size >> 20), - (unsigned long)(base >> 20), + pr_info("Reserved %ldMB of memory at %#016lx (System RAM: %ldMB)\n", + (unsigned long)(size >> 20), base, (unsigned long)(memblock_phys_mem_size() >> 20)); fw_dump.reserve_dump_area_start = base; @@ -543,23 +806,28 @@ static int fadump_init_elfcore_header(char *bufp) */ static void fadump_setup_crash_memory_ranges(void) { - struct memblock_region *reg; - unsigned long long start, end; + unsigned long long start, end, offset; + int i; pr_debug("Setup crash memory ranges.\n"); crash_mem_ranges = 0; + offset = fw_dump.boot_memory_size + fw_dump.boot_memory_hole_size; + /* - * add the first memory chunk (RMA_START through boot_memory_size) as - * a separate memory chunk. The reason is, at the time crash firmware - * will move the content of this memory chunk to different location - * specified during fadump registration. We need to create a separate - * program header for this chunk with the correct offset. + * Add real memory region(s) whose content is going to be moved to + * a different location, specified during fadump registration, by + * firmware at the time of crash. We need to create separate program + * header(s) for this memory chunk with the correct offset. */ - fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + for (i = 0; i < fw_dump.rmr_regions_cnt; i++) { + start = fw_dump.rmr_src_addr[i]; + end = start + fw_dump.rmr_src_size[i]; + fadump_add_crash_memory(start, end); + } - for_each_memblock(memory, reg) { - start = (unsigned long long)reg->base; - end = start + (unsigned long long)reg->size; + for (i = 0; i < memory_ranges_cnt; i++) { + start = memory_ranges[i].base; + end = start + memory_ranges[i].size; /* * skip the first memory chunk that is already added (RMA_START @@ -567,9 +835,9 @@ static void fadump_setup_crash_memory_ranges(void) * when RMA_START changes to a non-zero value. */ BUILD_BUG_ON(RMA_START != 0); - if (start < fw_dump.boot_memory_size) { - if (end > fw_dump.boot_memory_size) - start = fw_dump.boot_memory_size; + if (start < offset) { + if (end > offset) + start = offset; else continue; } @@ -586,17 +854,32 @@ static void fadump_setup_crash_memory_ranges(void) */ static inline unsigned long fadump_relocate(unsigned long paddr) { - if (paddr > RMA_START && paddr < fw_dump.boot_memory_size) - return fw_dump.rmr_destination_addr + paddr; - else - return paddr; + unsigned long raddr, rstart, rend, offset; + int i; + + offset = 0; + raddr = paddr; + for (i = 0; i < fw_dump.rmr_regions_cnt; i++) { + rstart = fw_dump.rmr_src_addr[i]; + rend = rstart + fw_dump.rmr_src_size[i]; + + if (paddr > rstart && paddr < rend) { + raddr += fw_dump.rmr_destination_addr + offset; + break; + } + + offset += fw_dump.rmr_src_size[i]; + } + + return raddr; } static int fadump_create_elfcore_headers(char *bufp) { struct elfhdr *elf; struct elf_phdr *phdr; - int i; + unsigned long long raddr, offset; + int i, j; fadump_init_elfcore_header(bufp); elf = (struct elfhdr *)bufp; @@ -639,9 +922,12 @@ static int fadump_create_elfcore_headers(char *bufp) (elf->e_phnum)++; /* setup PT_LOAD sections. */ - + j = 0; + offset = 0; + raddr = fw_dump.rmr_src_addr[0]; for (i = 0; i < crash_mem_ranges; i++) { unsigned long long mbase, msize; + mbase = crash_memory_ranges[i].base; msize = crash_memory_ranges[i].size; @@ -654,13 +940,17 @@ static int fadump_create_elfcore_headers(char *bufp) phdr->p_flags = PF_R|PF_W|PF_X; phdr->p_offset = mbase; - if (mbase == RMA_START) { + if (mbase == raddr) { /* * The entire RMA region will be moved by firmware * to the specified destination_address. Hence set * the correct offset. */ - phdr->p_offset = fw_dump.rmr_destination_addr; + phdr->p_offset = fw_dump.rmr_destination_addr + offset; + if (j < (fw_dump.rmr_regions_cnt - 1)) { + offset += fw_dump.rmr_src_size[j]; + raddr = fw_dump.rmr_src_addr[++j]; + } } phdr->p_paddr = mbase; @@ -707,6 +997,7 @@ static int register_fadump(void) if (!fw_dump.reserve_dump_area_size) return -ENODEV; + fadump_setup_memory_ranges(); fadump_setup_crash_memory_ranges(); addr = fadump_ops->get_meta_area_start(&fw_dump); @@ -822,14 +1113,14 @@ static void fadump_invalidate_release_mem(void) * later for releasing the memory for general use. */ reserved_area_start = fw_dump.reserve_dump_area_start; - reserved_area_end = reserved_area_start + - fw_dump.reserve_dump_area_size; + reserved_area_end = + memory_limit ? memory_limit : memblock_end_of_DRAM(); + /* - * Setup reserve_dump_area_start and its size so that we can - * reuse this reserved memory for Re-registration. + * Setup reserve_dump_area_start so that we can reuse this + * reserved memory for Re-registration. */ fw_dump.reserve_dump_area_start = destination_address; - fw_dump.reserve_dump_area_size = get_fadump_area_size(); fadump_release_memory(reserved_area_start, reserved_area_end); if (fw_dump.cpu_notes_buf) { diff --git a/arch/powerpc/kernel/fadump_internal.h b/arch/powerpc/kernel/fadump_internal.h index 3791da7..eae4b55 100644 --- a/arch/powerpc/kernel/fadump_internal.h +++ b/arch/powerpc/kernel/fadump_internal.h @@ -49,6 +49,7 @@ /* Firmware-Assited Dump platforms */ #define FADUMP_PLATFORM_PSERIES 1 +#define FADUMP_PLATFORM_POWERNV 2 #define FADUMP_CPU_ID_MASK ((1UL << 32) - 1) @@ -92,11 +93,14 @@ struct fadump_crash_info_header { /* Crash memory ranges */ #define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2) -struct fad_crash_memory_ranges { +struct fadump_memory_range { unsigned long long base; unsigned long long size; }; +/* Maximum no. of real memory regions supported by the kernel */ +#define MAX_REAL_MEM_REGIONS 6 + /* Firmware-assisted dump configuration details. */ struct fw_dump { unsigned long cpu_state_data_size; @@ -114,6 +118,17 @@ struct fw_dump { unsigned long rmr_source_len; unsigned long rmr_destination_addr; + unsigned long boot_memory_hole_size; + unsigned long rmr_regions_cnt; + unsigned long rmr_src_addr[MAX_REAL_MEM_REGIONS]; + unsigned long rmr_src_size[MAX_REAL_MEM_REGIONS]; + + /* + * Maximum size supported by firmware to copy from source to + * destination address per entry. + */ + unsigned long max_copy_size; + int ibm_configure_kernel_dump; unsigned long fadump_enabled:1; @@ -157,4 +172,14 @@ pseries_dt_scan_fadump(struct fw_dump *fadump_config, ulong node) } #endif +#ifdef CONFIG_PPC_POWERNV +extern int powernv_dt_scan_fadump(struct fw_dump *fadump_config, ulong node); +#else +static inline int +powernv_dt_scan_fadump(struct fw_dump *fadump_config, ulong node) +{ + return 1; +} +#endif + #endif /* __PPC64_FA_DUMP_INTERNAL_H__ */ diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 703a350..0d106b5 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -6,6 +6,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o +obj-$(CONFIG_FA_DUMP) += powernv_fadump.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 3da30c2..20bbb9c 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -325,3 +325,4 @@ OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE); OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR); OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR); +OPAL_CALL(opal_configure_fadump, OPAL_CONFIGURE_FADUMP); diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.c b/arch/powerpc/platforms/powernv/powernv_fadump.c new file mode 100644 index 0000000..6d4b515 --- /dev/null +++ b/arch/powerpc/platforms/powernv/powernv_fadump.c @@ -0,0 +1,337 @@ +/* + * Firmware-Assisted Dump support on POWERNV platform. + * + * Copyright 2018, IBM Corporation + * Author: Hari Bathini <hbath...@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG +#define pr_fmt(fmt) "powernv fadump: " fmt + +#include <linux/string.h> +#include <linux/memblock.h> +#include <linux/bug.h> +#include <linux/seq_file.h> +#include <linux/crash_dump.h> + +#include <asm/page.h> +#include <asm/prom.h> +#include <asm/opal.h> +#include <asm/fadump.h> + +#include "../../kernel/fadump_internal.h" +#include "powernv_fadump.h" + +static struct powernv_fadump_mem_struct fdm; +static const struct powernv_fadump_mem_struct *fdm_active; +unsigned long fdm_actual_size; + +static void update_fadump_config(struct fw_dump *fadump_conf, + const struct powernv_fadump_mem_struct *fdm) +{ + unsigned long base, size, last_end; + int section_cnt = be16_to_cpu(fdm->section_count); + int unused_sections = (POWERNV_MAX_SECTIONS - section_cnt); + int i, j; + + pr_debug("section_cnt: %d\n", section_cnt); + WARN_ON(unused_sections < 0); + fdm_actual_size = sizeof(*fdm) - + (unused_sections * sizeof(struct powernv_fadump_section)); + + /* + * The first real memory region entry is the real memory + * regions destination address. + */ + fadump_conf->rmr_destination_addr = 0; + for (i = 0; i < section_cnt; i++) { + if (fdm->section[i].src_type == + POWERNV_FADUMP_REAL_MODE_REGION) { + fadump_conf->rmr_destination_addr = + be64_to_cpu(fdm->section[i].dest_addr); + break; + } + } + pr_debug("Destination address of real memory regions: %#016lx\n", + fadump_conf->rmr_destination_addr); + + if (fadump_conf->dump_active) { + j = 0; + last_end = 0; + fadump_conf->rmr_source_len = 0; + fadump_conf->boot_memory_hole_size = 0; + for (i = 0; i < section_cnt; i++) { + if (fdm->section[i].src_type == + POWERNV_FADUMP_REAL_MODE_REGION) { + base = be64_to_cpu(fdm->section[i].src_addr); + size = be64_to_cpu(fdm->section[i].src_size); + pr_debug("%d. RMR base: 0x%lx, size: 0x%lx\n", + (i + 1), base, size); + + fadump_conf->rmr_src_addr[j] = base; + fadump_conf->rmr_src_size[j] = size; + fadump_conf->rmr_source_len += size; + + if (base > last_end) { + fadump_conf->boot_memory_hole_size += + (base - last_end); + } + + last_end = base + size; + j++; + } + } + fadump_conf->rmr_regions_cnt = j; + pr_debug("Real memory regions count: %lu\n", + fadump_conf->rmr_regions_cnt); + } +} + +static ulong powernv_init_fadump_mem_struct(struct fw_dump *fadump_conf, + ulong addr) +{ + int i, section_cnt = 0; + + fdm.section_size = cpu_to_be16(sizeof(struct powernv_fadump_section)); + + /* RMA region sections */ + for (i = 0; i < fadump_conf->rmr_regions_cnt; i++) { + fdm.section[RMR_REGION_INPUT_IDX + i].src_type = + POWERNV_FADUMP_REAL_MODE_REGION; + fdm.section[RMR_REGION_INPUT_IDX + i].src_addr = + cpu_to_be64(fadump_conf->rmr_src_addr[i]); + fdm.section[RMR_REGION_INPUT_IDX + i].dest_addr = + cpu_to_be64(addr); + fdm.section[RMR_REGION_INPUT_IDX + i].src_size = + fdm.section[RMR_REGION_INPUT_IDX + i].dest_size = + cpu_to_be64(fadump_conf->rmr_src_size[i]); + + section_cnt++; + addr += fadump_conf->rmr_src_size[i]; + } + + fdm.section_count = cpu_to_be16(section_cnt); + update_fadump_config(fadump_conf, &fdm); + + return addr; +} + +static int powernv_register_fadump(struct fw_dump *fadump_conf) +{ + int rc, err = -EIO; + + rc = opal_configure_fadump(FADUMP_REGISTER, &fdm, fdm_actual_size); + switch (rc) { + default: + pr_err("Failed to register. Unknown Error(%d).\n", rc); + break; + case OPAL_UNSUPPORTED: + pr_err("Support not available.\n"); + fadump_conf->fadump_supported = 0; + fadump_conf->fadump_enabled = 0; + break; + case OPAL_INTERNAL_ERROR: + pr_err("Failed to register. Hardware Error(%d).\n", rc); + break; + case OPAL_PARAMETER: + pr_err("Failed to register. Parameter Error(%d).\n", rc); + break; + case OPAL_PERMISSION: + pr_err("Already registered!\n"); + fadump_conf->dump_registered = 1; + err = -EEXIST; + break; + case OPAL_SUCCESS: + pr_err("Registration is successful!\n"); + fadump_conf->dump_registered = 1; + err = 0; + break; + } + + return err; +} + +static int powernv_unregister_fadump(struct fw_dump *fadump_conf) +{ + int rc; + + rc = opal_configure_fadump(FADUMP_UNREGISTER, &fdm, fdm_actual_size); + if (rc) { + pr_err("Failed to un-register - unexpected Error(%d).\n", rc); + return -EIO; + } + + fadump_conf->dump_registered = 0; + return 0; +} + +static ulong powernv_get_preserv_area_start(struct fw_dump *fadump_conf) +{ + return fadump_conf->rmr_destination_addr; +} + +static ulong powernv_get_meta_area_start(struct fw_dump *fadump_conf) +{ + return (fadump_conf->rmr_destination_addr + + fadump_conf->rmr_source_len); +} + +static int powernv_invalidate_fadump(struct fw_dump *fadump_conf) +{ + int rc; + + rc = opal_configure_fadump(FADUMP_INVALIDATE, (void *)fdm_active, + fdm_actual_size); + if (rc) { + pr_err("Failed to invalidate - unexpected Error(%d).\n", rc); + return -EIO; + } + + fadump_conf->dump_active = 0; + fdm_active = NULL; + return 0; +} + +/* + * Read CPU state dump data and convert it into ELF notes. + * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be + * used to access the data to allow for additional fields to be added without + * affecting compatibility. Each list of registers for a CPU starts with + * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, + * 8 Byte ASCII identifier and 8 Byte register value. The register entry + * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part + * of register value. For more details refer to PAPR document. + * + * Only for the crashing cpu we ignore the CPU dump data and get exact + * state from fadump crash info structure populated by first kernel at the + * time of crash. + */ +static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf) +{ + u32 num_cpus = 1, *note_buf; + struct fadump_crash_info_header *fdh = NULL; + + /* Allocate buffer to hold cpu crash notes. */ + fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); + fadump_conf->cpu_notes_buf_size = + PAGE_ALIGN(fadump_conf->cpu_notes_buf_size); + note_buf = fadump_cpu_notes_buf_alloc(fadump_conf->cpu_notes_buf_size); + if (!note_buf) { + pr_err("Failed to allocate 0x%lx bytes for cpu notes buffer\n", + fadump_conf->cpu_notes_buf_size); + return -ENOMEM; + } + fadump_conf->cpu_notes_buf = __pa(note_buf); + + pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", + (num_cpus * sizeof(note_buf_t)), note_buf); + + if (fadump_conf->fadumphdr_addr) + fdh = __va(fadump_conf->fadumphdr_addr); + + if (fdh && (fdh->crashing_cpu != CPU_UNKNOWN)) { + note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs)); + final_note(note_buf); + + pr_debug("Updating elfcore header (%llx) with cpu notes\n", + fdh->elfcorehdr_addr); + fadump_update_elfcore_header(fadump_conf, + __va(fdh->elfcorehdr_addr)); + } + + return 0; +} + +static int __init powernv_process_fadump(struct fw_dump *fadump_conf) +{ + struct fadump_crash_info_header *fdh; + int rc = 0; + + if (!fdm_active || !fadump_conf->fadumphdr_addr) + return -EINVAL; + + /* Validate the fadump crash info header */ + fdh = __va(fadump_conf->fadumphdr_addr); + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + pr_err("Crash info header is not valid.\n"); + return -EINVAL; + } + + /* + * TODO: To build cpu notes, find a way to map PIR to logical id. + * Also, we may need different method for pseries and powernv. + * The currently booted kernel could have a different PIR to + * logical id mapping. So, try saving info of previous kernel's + * paca to get the right PIR to logical id mapping. + */ + rc = fadump_build_cpu_notes(fadump_conf); + if (rc) + return rc; + + /* + * We are done validating dump info and elfcore header is now ready + * to be exported. set elfcorehdr_addr so that vmcore module will + * export the elfcore header through '/proc/vmcore'. + */ + elfcorehdr_addr = fdh->elfcorehdr_addr; + + return rc; +} + +static void powernv_fadump_region_show(struct fw_dump *fadump_conf, + struct seq_file *m) +{ +} + +static void powernv_crash_fadump(const char *msg) +{ + int rc; + + rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg); + if (rc == OPAL_UNSUPPORTED) + pr_emerg("Reboot type %d not supported\n", OPAL_REBOOT_MPIPL); + else if (rc == OPAL_HARDWARE) + pr_emerg("No backend support for MPIPL!\n"); +} + +static struct fadump_ops_t powernv_fadump_ops = { + .init_fadump_mem_struct = powernv_init_fadump_mem_struct, + .register_fadump = powernv_register_fadump, + .unregister_fadump = powernv_unregister_fadump, + .get_preserv_area_start = powernv_get_preserv_area_start, + .get_meta_area_start = powernv_get_meta_area_start, + .invalidate_fadump = powernv_invalidate_fadump, + .process_fadump = powernv_process_fadump, + .fadump_region_show = powernv_fadump_region_show, + .crash_fadump = powernv_crash_fadump, +}; + +int __init powernv_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node) +{ + /* + * Firmware currently supports only 32-bit value for size, + * align it to 1MB size. + */ + fadump_conf->max_copy_size = _ALIGN_DOWN(0xFFFFFFFF, (1 << 20)); + + /* + * Check if dump has been initiated on last reboot. + */ + fdm_active = of_get_flat_dt_prop(node, "result-table", NULL); + if (fdm_active) { + pr_info("Firmware-assisted dump is active.\n"); + fadump_conf->dump_active = 1; + update_fadump_config(fadump_conf, (void *)__pa(fdm_active)); + } + + fadump_ops = &powernv_fadump_ops; + fadump_conf->fadump_supported = 1; + fadump_conf->fadump_platform = FADUMP_PLATFORM_POWERNV; + + return 1; +} diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.h b/arch/powerpc/platforms/powernv/powernv_fadump.h new file mode 100644 index 0000000..224a142 --- /dev/null +++ b/arch/powerpc/platforms/powernv/powernv_fadump.h @@ -0,0 +1,63 @@ +/* + * Firmware-Assisted Dump support on POWERNV platform. + * + * Copyright 2018, IBM Corporation + * Author: Hari Bathini <hbath...@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __PPC64_POWERNV_FA_DUMP_H__ +#define __PPC64_POWERNV_FA_DUMP_H__ + +#define POWERNV_FADUMP_CPU_STATE_DATA 0x0000 +/* OPAL : 0x01 – 0x39 */ +#define POWERNV_FADUMP_OPAL_REGION 0x0001 +/* Firmware/SMF : 0x40 – 0x79 */ +#define POWERNV_FADUMP_FW_REGION 0x0040 +/* Kernel memory region : 0x80 – 0xb9 */ +#define POWERNV_FADUMP_REAL_MODE_REGION 0x0080 +/* Reserved for future use : 0xc0 – 0xff */ +#define POWERNV_FADUMP_RESERVED_REGION 0x00c0 + +enum powernv_fadump_section_types { + CPU_STATE_TYPE = 0, + OPAL_REGION_TYPE, + FW_REGION_TYPE, + RMR_REGION_TYPE, + POWERNV_SECTIONS +}; + +/* Starting index of RMR region in dump sections while registering */ +#define RMR_REGION_INPUT_IDX 0 + +#define POWERNV_MAX_SECTIONS (POWERNV_SECTIONS + \ + MAX_REAL_MEM_REGIONS - 1) + +/* Kernel Dump section info */ +struct powernv_fadump_section { + u8 src_type; + u8 reserved[7]; + __be64 src_addr; + __be64 src_size; + __be64 dest_addr; + __be64 dest_size; +}; + +/* + * Firmware Assisted dump memory structure. This structure is required for + * registering future kernel dump with power firmware through opal call. + */ +struct powernv_fadump_mem_struct { + + __be16 section_size; /*sizeof(struct fadump_section) */ + __be16 section_count; /* number of sections */ + __be32 reserved; + + struct powernv_fadump_section section[POWERNV_MAX_SECTIONS]; +}; + +#endif /* __PPC64_POWERNV_FA_DUMP_H__ */ diff --git a/arch/powerpc/platforms/pseries/pseries_fadump.c b/arch/powerpc/platforms/pseries/pseries_fadump.c index ac54501..ef7e59a 100644 --- a/arch/powerpc/platforms/pseries/pseries_fadump.c +++ b/arch/powerpc/platforms/pseries/pseries_fadump.c @@ -40,8 +40,12 @@ static void update_fadump_config(struct fw_dump *fadump_conf, be64_to_cpu(fdm->rmr_region.destination_address); if (fadump_conf->dump_active) { - fadump_conf->rmr_source_len = - be64_to_cpu(fdm->rmr_region.source_len); + fadump_conf->rmr_src_addr[0] = + be64_to_cpu(fdm->rmr_region.source_address); + fadump_conf->rmr_src_size[0] = be64_to_cpu(fdm->rmr_region.source_len); + fadump_conf->rmr_regions_cnt = 1; + fadump_conf->rmr_source_len = fadump_conf->rmr_src_size[0]; + fadump_conf->boot_memory_hole_size = 0; } }