From: Hari Bathini <hbath...@linux.vnet.ibm.com>

Firmware-assisted dump support is enabled for POWERNV platform in P9
firmware. Make the corresponding updates in kernel to enable fadump
support on POWERNV platform.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig                            |    2 
 arch/powerpc/include/asm/opal-api.h             |    4 
 arch/powerpc/include/asm/opal.h                 |    1 
 arch/powerpc/kernel/fadump.c                    |  397 ++++++++++++++++++++---
 arch/powerpc/kernel/fadump_internal.h           |   27 ++
 arch/powerpc/platforms/powernv/Makefile         |    1 
 arch/powerpc/platforms/powernv/opal-wrappers.S  |    1 
 arch/powerpc/platforms/powernv/powernv_fadump.c |  337 ++++++++++++++++++++
 arch/powerpc/platforms/powernv/powernv_fadump.h |   63 ++++
 arch/powerpc/platforms/pseries/pseries_fadump.c |    8 
 10 files changed, 783 insertions(+), 58 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/powernv_fadump.c
 create mode 100644 arch/powerpc/platforms/powernv/powernv_fadump.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c32a181..d749f1f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -599,7 +599,7 @@ config CRASH_DUMP
 
 config FA_DUMP
        bool "Firmware-assisted dump"
-       depends on PPC64 && PPC_RTAS
+       depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
        select CRASH_CORE
        select CRASH_DUMP
        help
diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index d886a5b..75e8925 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -206,7 +206,8 @@
 #define OPAL_NPU_TL_SET                                161
 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR           164
 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR           165
-#define OPAL_LAST                              165
+#define OPAL_CONFIGURE_FADUMP                  167
+#define OPAL_LAST                              167
 
 /* Device tree flags */
 
@@ -1040,6 +1041,7 @@ enum OpalSysCooling {
 enum {
        OPAL_REBOOT_NORMAL              = 0,
        OPAL_REBOOT_PLATFORM_ERROR      = 1,
+       OPAL_REBOOT_MPIPL               = 3,
 };
 
 /* Argument to OPAL_PCI_TCE_KILL */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 03e1a92..4c1f483 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -43,6 +43,7 @@ int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t 
bdfn,
                                uint64_t PE_handle);
 int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
                        uint64_t rate_phys, uint32_t size);
+int64_t opal_configure_fadump(uint64_t command, void *data, uint64_t 
data_size);
 int64_t opal_console_write(int64_t term_number, __be64 *length,
                           const uint8_t *buffer);
 int64_t opal_console_read(int64_t term_number, __be64 *length,
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 88fafe1..a27e4af 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -53,8 +53,12 @@ static struct cma *fadump_cma;
 struct fadump_ops_t *fadump_ops;
 
 static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
+struct fadump_memory_range crash_memory_ranges[INIT_CRASHMEM_RANGES];
 int crash_mem_ranges;
+struct fadump_memory_range reserved_ranges[INIT_MEMBLOCK_REGIONS];
+int reserved_ranges_cnt;
+struct fadump_memory_range memory_ranges[2 * INIT_MEMBLOCK_REGIONS];
+int memory_ranges_cnt;
 
 #ifdef CONFIG_CMA
 /*
@@ -120,6 +124,9 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, 
const char *uname,
        if (strcmp(uname, "rtas") == 0)
                return pseries_dt_scan_fadump(&fw_dump, node);
 
+       if (strcmp(uname, "ibm,dump") == 0)
+               return powernv_dt_scan_fadump(&fw_dump, node);
+
        return 0;
 }
 
@@ -156,6 +163,8 @@ int is_fadump_active(void)
 /* Print firmware assisted dump configurations for debugging purpose. */
 static void fadump_show_config(void)
 {
+       int i;
+
        pr_debug("Support for firmware-assisted dump (fadump): %s\n",
                        (fw_dump.fadump_supported ? "present" : "no support"));
 
@@ -170,6 +179,13 @@ static void fadump_show_config(void)
        pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
        pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
        pr_debug("Boot memory size  : %lx\n", fw_dump.boot_memory_size);
+       pr_debug("Real memory region hole size  : %lx\n",
+                fw_dump.boot_memory_hole_size);
+       pr_debug("Real meory regions count : %lx\n", fw_dump.rmr_regions_cnt);
+       for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+               pr_debug("%d. RMR base = %lx, size = %lx\n", (i+1),
+                        fw_dump.rmr_src_addr[i], fw_dump.rmr_src_size[i]);
+       }
 }
 
 /**
@@ -243,6 +259,157 @@ static inline unsigned long 
fadump_calculate_reserve_size(void)
        return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
 }
 
+static void __init fadump_get_reserved_ranges(void)
+{
+       unsigned long i, j, dt_root;
+       unsigned long long base, size;
+       struct fadump_memory_range tmp_range;
+       const __be32 *prop;
+       int len, idx;
+
+       early_init_fdt_reserve_self();
+       early_init_fdt_scan_reserved_mem();
+
+       dt_root = of_get_flat_dt_root();
+
+       prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len);
+
+       if (!prop)
+               return;
+
+       /*
+        * Each reserved range is an (address,size) pair, 2 cells each,
+        * totalling 4 cells per range.
+        */
+       for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+               u64 base, size;
+
+               base = of_read_number(prop + (i * 4) + 0, 2);
+               size = of_read_number(prop + (i * 4) + 2, 2);
+               if (!size)
+                       continue;
+
+               reserved_ranges[reserved_ranges_cnt].base = base;
+               reserved_ranges[reserved_ranges_cnt].size = size;
+               reserved_ranges_cnt++;
+
+       }
+
+       if (!reserved_ranges_cnt)
+               return;
+
+       /* Sort the reserved ranges */
+       for (i = 0; i < reserved_ranges_cnt; i++) {
+               idx = i;
+               for (j = i + 1; j < reserved_ranges_cnt; j++) {
+                       if (reserved_ranges[idx].base > reserved_ranges[j].base)
+                               idx = j;
+               }
+               if (idx != i) {
+                       tmp_range = reserved_ranges[idx];
+                       reserved_ranges[idx] = reserved_ranges[i];
+                       reserved_ranges[i] = tmp_range;
+               }
+       }
+
+       /* Merge adjacent reserved ranges */
+       idx = 0;
+       for (i = 1; i < reserved_ranges_cnt; i++) {
+               base = reserved_ranges[i-1].base;
+               size = reserved_ranges[i-1].size;
+               if (reserved_ranges[i].base == (base + size))
+                       reserved_ranges[idx].size += reserved_ranges[i].size;
+               else {
+                       idx++;
+                       if (i == idx)
+                               continue;
+
+                       reserved_ranges[idx] = reserved_ranges[i];
+               }
+       }
+       reserved_ranges_cnt = idx + 1;
+}
+
+static inline void fadump_add_memory_range(unsigned long long base,
+                                          unsigned long long size)
+{
+       if (!size)
+               return;
+
+       pr_debug("memory_range[%d] [%#016llx-%#016llx), %#llx bytes\n",
+                memory_ranges_cnt, base, base + size, size);
+       memory_ranges[memory_ranges_cnt].base = base;
+       memory_ranges[memory_ranges_cnt].size = size;
+       memory_ranges_cnt++;
+}
+
+static void fadump_setup_memory_ranges(void)
+{
+       unsigned long i, j;
+       unsigned long long base, end, size;
+       struct memblock_region *reg;
+       struct fadump_memory_range tmp_ranges[INIT_MEMBLOCK_REGIONS];
+       int tmp_ranges_cnt;
+
+       /* get memory ranges */
+       tmp_ranges_cnt = 0;
+       for_each_memblock(memory, reg) {
+               tmp_ranges[tmp_ranges_cnt].base = (unsigned long long)reg->base;
+               tmp_ranges[tmp_ranges_cnt].size = (unsigned long long)reg->size;
+               tmp_ranges_cnt++;
+       }
+
+       /* exclude reserved ranges */
+       memory_ranges_cnt = 0;
+       for (i = 0; i < tmp_ranges_cnt; i++) {
+               unsigned long long mem_base, mem_end, rsrv_base, rsrv_end;
+               int add = 1;
+
+               base = mem_base = tmp_ranges[i].base;
+               end = mem_end = base + tmp_ranges[i].size;
+               for (j = 0; j < reserved_ranges_cnt; j++) {
+                       rsrv_base = reserved_ranges[j].base;
+                       rsrv_end  = rsrv_base + reserved_ranges[j].size;
+
+                       if (mem_base > rsrv_end)
+                               continue;
+
+                       if ((j < (reserved_ranges_cnt - 1)) &&
+                           (reserved_ranges[j + 1].base < mem_end))
+                               mem_end = reserved_ranges[j + 1].base;
+
+                       if ((rsrv_base < mem_end) && (rsrv_end > mem_base)) {
+                               if ((mem_base < rsrv_base) &&
+                                   (mem_end > rsrv_end)) {
+                                       size = rsrv_base - mem_base;
+                                       fadump_add_memory_range(mem_base, size);
+                                       size = mem_end - rsrv_end;
+                                       fadump_add_memory_range(rsrv_end, size);
+                               } else if (mem_base < rsrv_base) {
+                                       size = rsrv_base - mem_base;
+                                       fadump_add_memory_range(mem_base, size);
+                               } else if (mem_end > rsrv_end) {
+                                       size = mem_end - rsrv_end;
+                                       fadump_add_memory_range(rsrv_end, size);
+                               }
+
+                               add = 0;
+                       }
+
+                       if (mem_end == end)
+                               break;
+
+                       mem_base = mem_end;
+                       mem_end = end;
+                       add = 1;
+               }
+
+               if (add)
+                       fadump_add_memory_range(mem_base, mem_end - mem_base);
+       }
+}
+
+
 /*
  * Calculate the total memory size required to be reserved for
  * firmware-assisted dump registration.
@@ -264,24 +431,113 @@ static unsigned long get_fadump_area_size(void)
        return size;
 }
 
-static void __init fadump_reserve_crash_area(unsigned long base,
-                                            unsigned long size)
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(unsigned long base)
 {
-       struct memblock_region *reg;
-       unsigned long mstart, mend, msize;
+       int i;
+       unsigned long mstart, msize;
 
-       for_each_memblock(memory, reg) {
-               mstart = max_t(unsigned long, base, reg->base);
-               mend = reg->base + reg->size;
-               mend = min(base + size, mend);
-
-               if (mstart < mend) {
-                       msize = mend - mstart;
-                       memblock_reserve(mstart, msize);
-                       pr_info("Reserved %ldMB of memory at %#016lx for saving 
crash dump\n",
-                               (msize >> 20), mstart);
+       for (i = 0; i < memory_ranges_cnt; i++) {
+               mstart = memory_ranges[i].base;
+               msize = memory_ranges[i].size;
+               if ((mstart + msize) < base)
+                       continue;
+
+               if (mstart < base) {
+                       msize -= (base - mstart);
+                       mstart = base;
                }
+               pr_info("Reserving %luMB of memory at %#016lx for saving crash 
dump",
+                       (msize >> 20), mstart);
+               memblock_reserve(mstart, msize);
+       }
+}
+
+static int __init add_rmr_region(unsigned long rmr_start,
+                                unsigned long rmr_size)
+{
+       int i = fw_dump.rmr_regions_cnt++;
+
+       if (fw_dump.rmr_regions_cnt > MAX_REAL_MEM_REGIONS)
+               return 0;
+
+       pr_debug("Added real memory range[%d] [%#016lx-%#016lx)\n",
+                i, rmr_start, (rmr_start + rmr_size));
+       fw_dump.rmr_src_addr[i] = rmr_start;
+       fw_dump.rmr_src_size[i] = rmr_size;
+       return 1;
+}
+
+/*
+ * Platforms like PowerNV have an upper limit on the size.
+ * If 'rmr_size' is bigger than that limit, split this memory range
+ * into multiple entries.
+ */
+static int __init add_rmr_regions(unsigned long rmr_start,
+                                 unsigned long rmr_size)
+{
+       unsigned long rstart, rsize, max_size;
+       int ret = 1;
+
+       rstart = rmr_start;
+       max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : rmr_size;
+       while (rmr_size) {
+               if (rmr_size > max_size)
+                       rsize = max_size;
+               else
+                       rsize = rmr_size;
+
+               ret = add_rmr_region(rstart, rsize);
+               if (!ret)
+                       break;
+
+               rmr_size -= rsize;
+               rstart += rsize;
        }
+
+       return ret;
+}
+
+static int __init fadump_get_rmr_regions(void)
+{
+       int i, ret = 1;
+       unsigned long base, size, last_end;
+       unsigned long mem_size = fw_dump.boot_memory_size;
+
+       fw_dump.rmr_regions_cnt = 0;
+       fw_dump.boot_memory_hole_size = 0;
+
+       /*
+        * TODO: Extent support for multiple real memory regions on
+        *       pseries platform too.
+        */
+       if (fw_dump.fadump_platform == FADUMP_PLATFORM_PSERIES) {
+               ret = add_rmr_regions(RMA_START, fw_dump.boot_memory_size);
+               return ret;
+       }
+
+       last_end = memory_ranges[0].base;
+       for (i = 0; i < memory_ranges_cnt; i++) {
+               base = memory_ranges[i].base;
+               size = memory_ranges[i].size;
+
+               if (base > last_end)
+                       fw_dump.boot_memory_hole_size += (base - last_end);
+
+               if (size >= mem_size) {
+                       ret = add_rmr_regions(base, mem_size);
+                       break;
+               }
+
+               mem_size -= size;
+               ret = add_rmr_regions(base, size);
+               if (!ret)
+                       break;
+
+               last_end = base + size;
+       }
+
+       return ret;
 }
 
 int __init fadump_reserve_mem(void)
@@ -297,6 +553,10 @@ int __init fadump_reserve_mem(void)
                fw_dump.fadump_enabled = 0;
                return 0;
        }
+
+       fadump_get_reserved_ranges();
+       fadump_setup_memory_ranges();
+
        /*
         * Initialize boot memory size
         * If dump is active then we have already calculated the size during
@@ -311,6 +571,11 @@ int __init fadump_reserve_mem(void)
                                                 FADUMP_CMA_ALIGNMENT);
 #endif
                fw_dump.rmr_source_len = fw_dump.boot_memory_size;
+               if (!fadump_get_rmr_regions()) {
+                       fw_dump.fadump_enabled = 0;
+                       pr_err("Too many holes in boot memory area to enable 
fadump\n");
+                       return 0;
+               }
        }
 
        size = get_fadump_area_size();
@@ -335,6 +600,7 @@ int __init fadump_reserve_mem(void)
        else
                memory_boundary = memblock_end_of_DRAM();
 
+       base = fw_dump.boot_memory_size + fw_dump.boot_memory_hole_size;
        if (fw_dump.dump_active) {
 #ifdef CONFIG_HUGETLB_PAGE
                /*
@@ -348,11 +614,9 @@ int __init fadump_reserve_mem(void)
                 * If last boot has crashed then reserve all the memory
                 * above boot_memory_size so that we don't touch it until
                 * dump is written to disk by userspace tool. This memory
-                * will be released for general use once the dump is saved.
+                * can be released for general use by invalidating fadump.
                 */
-               base = fw_dump.boot_memory_size;
-               size = memory_boundary - base;
-               fadump_reserve_crash_area(base, size);
+               fadump_reserve_crash_area(base);
 
                fw_dump.fadumphdr_addr =
                        fadump_ops->get_meta_area_start(&fw_dump);
@@ -366,23 +630,22 @@ int __init fadump_reserve_mem(void)
                 * use memblock_find_in_range() here since it doesn't allocate
                 * from bottom to top.
                 */
-               for (base = fw_dump.boot_memory_size;
-                    base <= (memory_boundary - size);
-                    base += size) {
+               while (base <= (memory_boundary - size)) {
                        if (memblock_is_region_memory(base, size) &&
                            !memblock_is_region_reserved(base, size))
                                break;
+
+                       base += size;
                }
+
                if ((base > (memory_boundary - size)) ||
                    memblock_reserve(base, size)) {
                        pr_err("Failed to reserve memory\n");
                        return 0;
                }
 
-               pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
-                       "assisted dump (System RAM: %ldMB)\n",
-                       (unsigned long)(size >> 20),
-                       (unsigned long)(base >> 20),
+               pr_info("Reserved %ldMB of memory at %#016lx (System RAM: 
%ldMB)\n",
+                       (unsigned long)(size >> 20), base,
                        (unsigned long)(memblock_phys_mem_size() >> 20));
 
                fw_dump.reserve_dump_area_start = base;
@@ -543,23 +806,28 @@ static int fadump_init_elfcore_header(char *bufp)
  */
 static void fadump_setup_crash_memory_ranges(void)
 {
-       struct memblock_region *reg;
-       unsigned long long start, end;
+       unsigned long long start, end, offset;
+       int i;
 
        pr_debug("Setup crash memory ranges.\n");
        crash_mem_ranges = 0;
+       offset = fw_dump.boot_memory_size + fw_dump.boot_memory_hole_size;
+
        /*
-        * add the first memory chunk (RMA_START through boot_memory_size) as
-        * a separate memory chunk. The reason is, at the time crash firmware
-        * will move the content of this memory chunk to different location
-        * specified during fadump registration. We need to create a separate
-        * program header for this chunk with the correct offset.
+        * Add real memory region(s) whose content is going to be moved to
+        * a different location, specified during fadump registration, by
+        * firmware at the time of crash. We need to create separate program
+        * header(s) for this memory chunk with the correct offset.
         */
-       fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+       for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+               start = fw_dump.rmr_src_addr[i];
+               end = start + fw_dump.rmr_src_size[i];
+               fadump_add_crash_memory(start, end);
+       }
 
-       for_each_memblock(memory, reg) {
-               start = (unsigned long long)reg->base;
-               end = start + (unsigned long long)reg->size;
+       for (i = 0; i < memory_ranges_cnt; i++) {
+               start = memory_ranges[i].base;
+               end = start + memory_ranges[i].size;
 
                /*
                 * skip the first memory chunk that is already added (RMA_START
@@ -567,9 +835,9 @@ static void fadump_setup_crash_memory_ranges(void)
                 * when RMA_START changes to a non-zero value.
                 */
                BUILD_BUG_ON(RMA_START != 0);
-               if (start < fw_dump.boot_memory_size) {
-                       if (end > fw_dump.boot_memory_size)
-                               start = fw_dump.boot_memory_size;
+               if (start < offset) {
+                       if (end > offset)
+                               start = offset;
                        else
                                continue;
                }
@@ -586,17 +854,32 @@ static void fadump_setup_crash_memory_ranges(void)
  */
 static inline unsigned long fadump_relocate(unsigned long paddr)
 {
-       if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
-               return fw_dump.rmr_destination_addr + paddr;
-       else
-               return paddr;
+       unsigned long raddr, rstart, rend, offset;
+       int i;
+
+       offset = 0;
+       raddr = paddr;
+       for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+               rstart = fw_dump.rmr_src_addr[i];
+               rend = rstart + fw_dump.rmr_src_size[i];
+
+               if (paddr > rstart && paddr < rend) {
+                       raddr += fw_dump.rmr_destination_addr + offset;
+                       break;
+               }
+
+               offset += fw_dump.rmr_src_size[i];
+       }
+
+       return raddr;
 }
 
 static int fadump_create_elfcore_headers(char *bufp)
 {
        struct elfhdr *elf;
        struct elf_phdr *phdr;
-       int i;
+       unsigned long long raddr, offset;
+       int i, j;
 
        fadump_init_elfcore_header(bufp);
        elf = (struct elfhdr *)bufp;
@@ -639,9 +922,12 @@ static int fadump_create_elfcore_headers(char *bufp)
        (elf->e_phnum)++;
 
        /* setup PT_LOAD sections. */
-
+       j = 0;
+       offset = 0;
+       raddr = fw_dump.rmr_src_addr[0];
        for (i = 0; i < crash_mem_ranges; i++) {
                unsigned long long mbase, msize;
+
                mbase = crash_memory_ranges[i].base;
                msize = crash_memory_ranges[i].size;
 
@@ -654,13 +940,17 @@ static int fadump_create_elfcore_headers(char *bufp)
                phdr->p_flags   = PF_R|PF_W|PF_X;
                phdr->p_offset  = mbase;
 
-               if (mbase == RMA_START) {
+               if (mbase == raddr) {
                        /*
                         * The entire RMA region will be moved by firmware
                         * to the specified destination_address. Hence set
                         * the correct offset.
                         */
-                       phdr->p_offset = fw_dump.rmr_destination_addr;
+                       phdr->p_offset = fw_dump.rmr_destination_addr + offset;
+                       if (j < (fw_dump.rmr_regions_cnt - 1)) {
+                               offset += fw_dump.rmr_src_size[j];
+                               raddr = fw_dump.rmr_src_addr[++j];
+                       }
                }
 
                phdr->p_paddr = mbase;
@@ -707,6 +997,7 @@ static int register_fadump(void)
        if (!fw_dump.reserve_dump_area_size)
                return -ENODEV;
 
+       fadump_setup_memory_ranges();
        fadump_setup_crash_memory_ranges();
 
        addr = fadump_ops->get_meta_area_start(&fw_dump);
@@ -822,14 +1113,14 @@ static void fadump_invalidate_release_mem(void)
         * later for releasing the memory for general use.
         */
        reserved_area_start = fw_dump.reserve_dump_area_start;
-       reserved_area_end = reserved_area_start +
-                       fw_dump.reserve_dump_area_size;
+       reserved_area_end =
+               memory_limit ? memory_limit : memblock_end_of_DRAM();
+
        /*
-        * Setup reserve_dump_area_start and its size so that we can
-        * reuse this reserved memory for Re-registration.
+        * Setup reserve_dump_area_start so that we can reuse this
+        * reserved memory for Re-registration.
         */
        fw_dump.reserve_dump_area_start = destination_address;
-       fw_dump.reserve_dump_area_size = get_fadump_area_size();
 
        fadump_release_memory(reserved_area_start, reserved_area_end);
        if (fw_dump.cpu_notes_buf) {
diff --git a/arch/powerpc/kernel/fadump_internal.h 
b/arch/powerpc/kernel/fadump_internal.h
index 3791da7..eae4b55 100644
--- a/arch/powerpc/kernel/fadump_internal.h
+++ b/arch/powerpc/kernel/fadump_internal.h
@@ -49,6 +49,7 @@
 
 /* Firmware-Assited Dump platforms */
 #define FADUMP_PLATFORM_PSERIES                1
+#define FADUMP_PLATFORM_POWERNV                2
 
 #define FADUMP_CPU_ID_MASK             ((1UL << 32) - 1)
 
@@ -92,11 +93,14 @@ struct fadump_crash_info_header {
 /* Crash memory ranges */
 #define INIT_CRASHMEM_RANGES   (INIT_MEMBLOCK_REGIONS + 2)
 
-struct fad_crash_memory_ranges {
+struct fadump_memory_range {
        unsigned long long      base;
        unsigned long long      size;
 };
 
+/* Maximum no. of real memory regions supported by the kernel */
+#define MAX_REAL_MEM_REGIONS           6
+
 /* Firmware-assisted dump configuration details. */
 struct fw_dump {
        unsigned long   cpu_state_data_size;
@@ -114,6 +118,17 @@ struct fw_dump {
        unsigned long   rmr_source_len;
        unsigned long   rmr_destination_addr;
 
+       unsigned long   boot_memory_hole_size;
+       unsigned long   rmr_regions_cnt;
+       unsigned long   rmr_src_addr[MAX_REAL_MEM_REGIONS];
+       unsigned long   rmr_src_size[MAX_REAL_MEM_REGIONS];
+
+       /*
+        * Maximum size supported by firmware to copy from source to
+        * destination address per entry.
+        */
+       unsigned long   max_copy_size;
+
        int             ibm_configure_kernel_dump;
 
        unsigned long   fadump_enabled:1;
@@ -157,4 +172,14 @@ pseries_dt_scan_fadump(struct fw_dump *fadump_config, 
ulong node)
 }
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+extern int powernv_dt_scan_fadump(struct fw_dump *fadump_config, ulong node);
+#else
+static inline int
+powernv_dt_scan_fadump(struct fw_dump *fadump_config, ulong node)
+{
+       return 1;
+}
+#endif
+
 #endif /* __PPC64_FA_DUMP_INTERNAL_H__ */
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index 703a350..0d106b5 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,6 +6,7 @@ obj-y                   += opal-msglog.o opal-hmi.o 
opal-power.o opal-irqchip.o
 obj-y                  += opal-kmsg.o opal-powercap.o opal-psr.o 
opal-sensor-groups.o
 
 obj-$(CONFIG_SMP)      += smp.o subcore.o subcore-asm.o
+obj-$(CONFIG_FA_DUMP)  += powernv_fadump.o
 obj-$(CONFIG_PCI)      += pci.o pci-ioda.o npu-dma.o
 obj-$(CONFIG_CXL_BASE) += pci-cxl.o
 obj-$(CONFIG_EEH)      += eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3da30c2..20bbb9c 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -325,3 +325,4 @@ OPAL_CALL(opal_npu_spa_clear_cache,         
OPAL_NPU_SPA_CLEAR_CACHE);
 OPAL_CALL(opal_npu_tl_set,                     OPAL_NPU_TL_SET);
 OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,                
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
 OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,                
OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_configure_fadump,               OPAL_CONFIGURE_FADUMP);
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.c 
b/arch/powerpc/platforms/powernv/powernv_fadump.c
new file mode 100644
index 0000000..6d4b515
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.c
@@ -0,0 +1,337 @@
+/*
+ * Firmware-Assisted Dump support on POWERNV platform.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbath...@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "powernv fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/bug.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/opal.h>
+#include <asm/fadump.h>
+
+#include "../../kernel/fadump_internal.h"
+#include "powernv_fadump.h"
+
+static struct powernv_fadump_mem_struct fdm;
+static const struct powernv_fadump_mem_struct *fdm_active;
+unsigned long fdm_actual_size;
+
+static void update_fadump_config(struct fw_dump *fadump_conf,
+                                const struct powernv_fadump_mem_struct *fdm)
+{
+       unsigned long base, size, last_end;
+       int section_cnt = be16_to_cpu(fdm->section_count);
+       int unused_sections  = (POWERNV_MAX_SECTIONS - section_cnt);
+       int i, j;
+
+       pr_debug("section_cnt: %d\n", section_cnt);
+       WARN_ON(unused_sections < 0);
+       fdm_actual_size = sizeof(*fdm) -
+               (unused_sections * sizeof(struct powernv_fadump_section));
+
+       /*
+        * The first real memory region entry is the real memory
+        * regions destination address.
+        */
+       fadump_conf->rmr_destination_addr = 0;
+       for (i = 0; i < section_cnt; i++) {
+               if (fdm->section[i].src_type ==
+                   POWERNV_FADUMP_REAL_MODE_REGION) {
+                       fadump_conf->rmr_destination_addr =
+                               be64_to_cpu(fdm->section[i].dest_addr);
+                       break;
+               }
+       }
+       pr_debug("Destination address of real memory regions: %#016lx\n",
+                fadump_conf->rmr_destination_addr);
+
+       if (fadump_conf->dump_active) {
+               j = 0;
+               last_end = 0;
+               fadump_conf->rmr_source_len = 0;
+               fadump_conf->boot_memory_hole_size = 0;
+               for (i = 0; i < section_cnt; i++) {
+                       if (fdm->section[i].src_type ==
+                           POWERNV_FADUMP_REAL_MODE_REGION) {
+                               base = be64_to_cpu(fdm->section[i].src_addr);
+                               size = be64_to_cpu(fdm->section[i].src_size);
+                               pr_debug("%d. RMR base: 0x%lx, size: 0x%lx\n",
+                                        (i + 1), base, size);
+
+                               fadump_conf->rmr_src_addr[j] = base;
+                               fadump_conf->rmr_src_size[j] = size;
+                               fadump_conf->rmr_source_len += size;
+
+                               if (base > last_end) {
+                                       fadump_conf->boot_memory_hole_size +=
+                                               (base - last_end);
+                               }
+
+                               last_end = base + size;
+                               j++;
+                       }
+               }
+               fadump_conf->rmr_regions_cnt = j;
+               pr_debug("Real memory regions count: %lu\n",
+                        fadump_conf->rmr_regions_cnt);
+       }
+}
+
+static ulong powernv_init_fadump_mem_struct(struct fw_dump *fadump_conf,
+                                           ulong addr)
+{
+       int i, section_cnt = 0;
+
+       fdm.section_size = cpu_to_be16(sizeof(struct powernv_fadump_section));
+
+       /* RMA region sections */
+       for (i = 0; i < fadump_conf->rmr_regions_cnt; i++) {
+               fdm.section[RMR_REGION_INPUT_IDX + i].src_type  =
+                       POWERNV_FADUMP_REAL_MODE_REGION;
+               fdm.section[RMR_REGION_INPUT_IDX + i].src_addr  =
+                       cpu_to_be64(fadump_conf->rmr_src_addr[i]);
+               fdm.section[RMR_REGION_INPUT_IDX + i].dest_addr =
+                       cpu_to_be64(addr);
+               fdm.section[RMR_REGION_INPUT_IDX + i].src_size  =
+                       fdm.section[RMR_REGION_INPUT_IDX + i].dest_size =
+                       cpu_to_be64(fadump_conf->rmr_src_size[i]);
+
+               section_cnt++;
+               addr += fadump_conf->rmr_src_size[i];
+       }
+
+       fdm.section_count = cpu_to_be16(section_cnt);
+       update_fadump_config(fadump_conf, &fdm);
+
+       return addr;
+}
+
+static int powernv_register_fadump(struct fw_dump *fadump_conf)
+{
+       int rc, err = -EIO;
+
+       rc = opal_configure_fadump(FADUMP_REGISTER, &fdm, fdm_actual_size);
+       switch (rc) {
+       default:
+               pr_err("Failed to register. Unknown Error(%d).\n", rc);
+               break;
+       case OPAL_UNSUPPORTED:
+               pr_err("Support not available.\n");
+               fadump_conf->fadump_supported = 0;
+               fadump_conf->fadump_enabled = 0;
+               break;
+       case OPAL_INTERNAL_ERROR:
+               pr_err("Failed to register. Hardware Error(%d).\n", rc);
+               break;
+       case OPAL_PARAMETER:
+               pr_err("Failed to register. Parameter Error(%d).\n", rc);
+               break;
+       case OPAL_PERMISSION:
+               pr_err("Already registered!\n");
+               fadump_conf->dump_registered = 1;
+               err = -EEXIST;
+               break;
+       case OPAL_SUCCESS:
+               pr_err("Registration is successful!\n");
+               fadump_conf->dump_registered = 1;
+               err = 0;
+               break;
+       }
+
+       return err;
+}
+
+static int powernv_unregister_fadump(struct fw_dump *fadump_conf)
+{
+       int rc;
+
+       rc = opal_configure_fadump(FADUMP_UNREGISTER, &fdm, fdm_actual_size);
+       if (rc) {
+               pr_err("Failed to un-register - unexpected Error(%d).\n", rc);
+               return -EIO;
+       }
+
+       fadump_conf->dump_registered = 0;
+       return 0;
+}
+
+static ulong powernv_get_preserv_area_start(struct fw_dump *fadump_conf)
+{
+       return fadump_conf->rmr_destination_addr;
+}
+
+static ulong powernv_get_meta_area_start(struct fw_dump *fadump_conf)
+{
+       return (fadump_conf->rmr_destination_addr +
+               fadump_conf->rmr_source_len);
+}
+
+static int powernv_invalidate_fadump(struct fw_dump *fadump_conf)
+{
+       int rc;
+
+       rc = opal_configure_fadump(FADUMP_INVALIDATE, (void *)fdm_active,
+                                  fdm_actual_size);
+       if (rc) {
+               pr_err("Failed to invalidate - unexpected Error(%d).\n", rc);
+               return -EIO;
+       }
+
+       fadump_conf->dump_active = 0;
+       fdm_active = NULL;
+       return 0;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
+{
+       u32 num_cpus = 1, *note_buf;
+       struct fadump_crash_info_header *fdh = NULL;
+
+       /* Allocate buffer to hold cpu crash notes. */
+       fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
+       fadump_conf->cpu_notes_buf_size =
+               PAGE_ALIGN(fadump_conf->cpu_notes_buf_size);
+       note_buf = fadump_cpu_notes_buf_alloc(fadump_conf->cpu_notes_buf_size);
+       if (!note_buf) {
+               pr_err("Failed to allocate 0x%lx bytes for cpu notes buffer\n",
+                      fadump_conf->cpu_notes_buf_size);
+               return -ENOMEM;
+       }
+       fadump_conf->cpu_notes_buf = __pa(note_buf);
+
+       pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
+                (num_cpus * sizeof(note_buf_t)), note_buf);
+
+       if (fadump_conf->fadumphdr_addr)
+               fdh = __va(fadump_conf->fadumphdr_addr);
+
+       if (fdh && (fdh->crashing_cpu != CPU_UNKNOWN)) {
+               note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
+               final_note(note_buf);
+
+               pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+                        fdh->elfcorehdr_addr);
+               fadump_update_elfcore_header(fadump_conf,
+                                            __va(fdh->elfcorehdr_addr));
+       }
+
+       return 0;
+}
+
+static int __init powernv_process_fadump(struct fw_dump *fadump_conf)
+{
+       struct fadump_crash_info_header *fdh;
+       int rc = 0;
+
+       if (!fdm_active || !fadump_conf->fadumphdr_addr)
+               return -EINVAL;
+
+       /* Validate the fadump crash info header */
+       fdh = __va(fadump_conf->fadumphdr_addr);
+       if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+               pr_err("Crash info header is not valid.\n");
+               return -EINVAL;
+       }
+
+       /*
+        * TODO: To build cpu notes, find a way to map PIR to logical id.
+        *       Also, we may need different method for pseries and powernv.
+        *       The currently booted kernel could have a different PIR to
+        *       logical id mapping. So, try saving info of previous kernel's
+        *       paca to get the right PIR to logical id mapping.
+        */
+       rc = fadump_build_cpu_notes(fadump_conf);
+       if (rc)
+               return rc;
+
+       /*
+        * We are done validating dump info and elfcore header is now ready
+        * to be exported. set elfcorehdr_addr so that vmcore module will
+        * export the elfcore header through '/proc/vmcore'.
+        */
+       elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+       return rc;
+}
+
+static void powernv_fadump_region_show(struct fw_dump *fadump_conf,
+                                      struct seq_file *m)
+{
+}
+
+static void powernv_crash_fadump(const char *msg)
+{
+       int rc;
+
+       rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
+       if (rc == OPAL_UNSUPPORTED)
+               pr_emerg("Reboot type %d not supported\n", OPAL_REBOOT_MPIPL);
+       else if (rc == OPAL_HARDWARE)
+               pr_emerg("No backend support for MPIPL!\n");
+}
+
+static struct fadump_ops_t powernv_fadump_ops = {
+       .init_fadump_mem_struct = powernv_init_fadump_mem_struct,
+       .register_fadump        = powernv_register_fadump,
+       .unregister_fadump      = powernv_unregister_fadump,
+       .get_preserv_area_start = powernv_get_preserv_area_start,
+       .get_meta_area_start    = powernv_get_meta_area_start,
+       .invalidate_fadump      = powernv_invalidate_fadump,
+       .process_fadump         = powernv_process_fadump,
+       .fadump_region_show     = powernv_fadump_region_show,
+       .crash_fadump           = powernv_crash_fadump,
+};
+
+int __init powernv_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node)
+{
+       /*
+        * Firmware currently supports only 32-bit value for size,
+        * align it to 1MB size.
+        */
+       fadump_conf->max_copy_size = _ALIGN_DOWN(0xFFFFFFFF, (1 << 20));
+
+       /*
+        * Check if dump has been initiated on last reboot.
+        */
+       fdm_active = of_get_flat_dt_prop(node, "result-table", NULL);
+       if (fdm_active) {
+               pr_info("Firmware-assisted dump is active.\n");
+               fadump_conf->dump_active = 1;
+               update_fadump_config(fadump_conf, (void *)__pa(fdm_active));
+       }
+
+       fadump_ops = &powernv_fadump_ops;
+       fadump_conf->fadump_supported = 1;
+       fadump_conf->fadump_platform = FADUMP_PLATFORM_POWERNV;
+
+       return 1;
+}
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.h 
b/arch/powerpc/platforms/powernv/powernv_fadump.h
new file mode 100644
index 0000000..224a142
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.h
@@ -0,0 +1,63 @@
+/*
+ * Firmware-Assisted Dump support on POWERNV platform.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbath...@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __PPC64_POWERNV_FA_DUMP_H__
+#define __PPC64_POWERNV_FA_DUMP_H__
+
+#define POWERNV_FADUMP_CPU_STATE_DATA  0x0000
+/* OPAL : 0x01 – 0x39 */
+#define POWERNV_FADUMP_OPAL_REGION     0x0001
+/* Firmware/SMF : 0x40 – 0x79 */
+#define POWERNV_FADUMP_FW_REGION       0x0040
+/* Kernel memory region : 0x80 – 0xb9 */
+#define POWERNV_FADUMP_REAL_MODE_REGION        0x0080
+/* Reserved for future use : 0xc0 – 0xff */
+#define POWERNV_FADUMP_RESERVED_REGION 0x00c0
+
+enum powernv_fadump_section_types {
+       CPU_STATE_TYPE          = 0,
+       OPAL_REGION_TYPE,
+       FW_REGION_TYPE,
+       RMR_REGION_TYPE,
+       POWERNV_SECTIONS
+};
+
+/* Starting index of RMR region in dump sections while registering */
+#define RMR_REGION_INPUT_IDX           0
+
+#define POWERNV_MAX_SECTIONS           (POWERNV_SECTIONS + \
+                                        MAX_REAL_MEM_REGIONS - 1)
+
+/* Kernel Dump section info */
+struct powernv_fadump_section {
+       u8      src_type;
+       u8      reserved[7];
+       __be64  src_addr;
+       __be64  src_size;
+       __be64  dest_addr;
+       __be64  dest_size;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through opal call.
+ */
+struct powernv_fadump_mem_struct {
+
+       __be16  section_size;           /*sizeof(struct fadump_section) */
+       __be16  section_count;          /* number of sections */
+       __be32  reserved;
+
+       struct powernv_fadump_section   section[POWERNV_MAX_SECTIONS];
+};
+
+#endif /* __PPC64_POWERNV_FA_DUMP_H__ */
diff --git a/arch/powerpc/platforms/pseries/pseries_fadump.c 
b/arch/powerpc/platforms/pseries/pseries_fadump.c
index ac54501..ef7e59a 100644
--- a/arch/powerpc/platforms/pseries/pseries_fadump.c
+++ b/arch/powerpc/platforms/pseries/pseries_fadump.c
@@ -40,8 +40,12 @@ static void update_fadump_config(struct fw_dump *fadump_conf,
                be64_to_cpu(fdm->rmr_region.destination_address);
 
        if (fadump_conf->dump_active) {
-               fadump_conf->rmr_source_len =
-                       be64_to_cpu(fdm->rmr_region.source_len);
+               fadump_conf->rmr_src_addr[0] =
+                       be64_to_cpu(fdm->rmr_region.source_address);
+               fadump_conf->rmr_src_size[0] = 
be64_to_cpu(fdm->rmr_region.source_len);
+               fadump_conf->rmr_regions_cnt = 1;
+               fadump_conf->rmr_source_len = fadump_conf->rmr_src_size[0];
+               fadump_conf->boot_memory_hole_size = 0;
        }
 }
 

Reply via email to