On 15/12/23 06:53, Baoquan He wrote:
On 12/11/23 at 02:00pm, Sourabh Jain wrote:
......
diff --git a/arch/powerpc/include/asm/kexec_ranges.h 
b/arch/powerpc/include/asm/kexec_ranges.h
index f83866a19e87..802abf580cf0 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,6 +7,7 @@
  void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
  struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
  int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
  int add_tce_mem_ranges(struct crash_mem **mem_ranges);
  int add_initrd_mem_range(struct crash_mem **mem_ranges);
  #ifdef CONFIG_PPC_64S_HASH_MMU
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 9932793cd64b..5be30659172f 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -19,8 +19,11 @@
  #include <linux/of.h>
  #include <linux/libfdt.h>
  #include <linux/memblock.h>
+#include <linux/memory.h>
#include <asm/page.h>
+#include <asm/drmem.h>
+#include <asm/mmzone.h>
  #include <asm/current.h>
  #include <asm/machdep.h>
  #include <asm/cacheflush.h>
@@ -547,9 +550,7 @@ int update_cpus_node(void *fdt)
  #undef pr_fmt
  #define pr_fmt(fmt) "crash hp: " fmt
-#ifdef CONFIG_HOTPLUG_CPU
- /* Provides the value for the sysfs crash_hotplug nodes */
-int arch_crash_hotplug_cpu_support(struct kimage *image)
+static int crash_hotplug_support(struct kimage *image)
  {
        if (image->file_mode)
                return 1;
@@ -560,8 +561,118 @@ int arch_crash_hotplug_cpu_support(struct kimage *image)
         */
        return image->update_elfcorehdr && image->update_fdt;
  }
+
+#ifdef CONFIG_HOTPLUG_CPU
+ /* Provides the value for the sysfs crash_hotplug nodes */
+int arch_crash_hotplug_cpu_support(struct kimage *image)
+{
+       return crash_hotplug_support(image);
+}
+#endif
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /* Provides the value for the sysfs memory_hotplug nodes */
+int arch_crash_hotplug_memory_support(struct kimage *image)
+{
+       return crash_hotplug_support(image);
+}
  #endif
+/*
+ * Advertise preferred elfcorehdr size to userspace via
+ * /sys/kernel/crash_elfcorehdr_size sysfs interface.
+ */
+unsigned int arch_crash_get_elfcorehdr_size(void)
+{
+       unsigned int sz;
+       unsigned long elf_phdr_cnt;
+
+       /* Program header for CPU notes and vmcoreinfo */
+       elf_phdr_cnt = 2;
+       if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+               /* In the worst case, a Phdr is needed for every other LMB to be
+                * represented as an individual crash range.
+                */
+               elf_phdr_cnt += memory_hotplug_max() / (2 * drmem_lmb_size());
+
+       /* Do not cross the max limit */
+       if (elf_phdr_cnt > PN_XNUM)
+               elf_phdr_cnt = PN_XNUM;
+
+       sz = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(Elf64_Phdr));
+       return sz;
+}
+
+/**
+ * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old
+ *                            elfcorehdr in the kexec segment array.
+ * @image: the active struct kimage
+ * @mn: struct memory_notify data handler
+ */
+static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify 
*mn)
+{
+       int ret;
+       struct crash_mem *cmem = NULL;
+       struct kexec_segment *ksegment;
+       void *ptr, *mem, *elfbuf = NULL;
+       unsigned long elfsz, memsz, base_addr, size;
+
+       ksegment = &image->segment[image->elfcorehdr_index];
+       mem = (void *) ksegment->mem;
+       memsz = ksegment->memsz;
+
+       ret = get_crash_memory_ranges(&cmem);
+       if (ret) {
+               pr_err("Failed to get crash mem range\n");
+               return;
+       }
+
+       /*
+        * The hot unplugged memory is part of crash memory ranges,
+        * remove it here.
+        */
+       if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) {
+               base_addr = PFN_PHYS(mn->start_pfn);
+               size = mn->nr_pages * PAGE_SIZE;
+               ret = remove_mem_range(&cmem, base_addr, size);
Althouth this is ppc specific, I don't understand. Why don't you
recreate the elfcorehdr, but take removing the removed region. Comparing the
remove_mem_range() implementation with recreating, I don't see too much
benefit from that, and it makes your code more complicated. Just
curious, surely ppc people can decide what should be taken.

I am recreating `elfcorehdr` by calling `crash_prepare_elf64_headers()` below.

This complexity is necessary to avoid adding hot-removed memory to the
new `elfcorehdr`.

On powerpc, the memblock list is utilized to prepare the `elfcorehdr`. In the
case of memory hot removal, the memblock list is updated after the arch
crash hotplug handler is triggered. Thus, the hot-removed memory is explicitly
removed from the crash memory ranges to ensure that the memory ranges
added to `elfcorehdr` do not include the hot-removed memory.

Thanks,
Sourabh Jain


+               if (ret) {
+                       pr_err("Failed to remove hot-unplugged from crash memory 
ranges.\n");
+                       return;
+               }
+       }
+
+       ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz);
+       if (ret) {
+               pr_err("Failed to prepare elf header\n");
+               return;
+       }
+
+       /*
+        * It is unlikely that kernel hit this because elfcorehdr kexec
+        * segment (memsz) is built with addition space to accommodate growing
+        * number of crash memory ranges while loading the kdump kernel. It is
+        * Just to avoid any unforeseen case.
+        */
+       if (elfsz > memsz) {
+               pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, 
memsz);
+               goto out;
+       }
+
+       ptr = __va(mem);
+       if (ptr) {
+               /* Temporarily invalidate the crash image while it is replaced 
*/
+               xchg(&kexec_crash_image, NULL);
+
+               /* Replace the old elfcorehdr with newly prepared elfcorehdr */
+               memcpy((void *)ptr, elfbuf, elfsz);
+
+               /* The crash image is now valid once again */
+               xchg(&kexec_crash_image, image);
+       }
+out:
+       vfree(elfbuf);
+}
+
  /**
   * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events 
to update the
   *                                 necessary kexec segments based on the 
hotplug event.
@@ -572,7 +683,7 @@ int arch_crash_hotplug_cpu_support(struct kimage *image)
   * CPU addition: Update the FDT segment to include the newly added CPU.
   * CPU removal: No action is needed, with the assumption that it's okay to 
have offline CPUs
   *            as part of the FDT.
- * Memory addition/removal: No action is taken as this is not yet supported.
+ * Memory addition/removal: Recreate the elfcorehdr segment
   */
  void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
  {
@@ -593,7 +704,6 @@ void arch_crash_handle_hotplug_event(struct kimage *image, 
void *arg)
                return;
} else if (hp_action == KEXEC_CRASH_HP_ADD_CPU) {
-
                void *fdt, *ptr;
                unsigned long mem;
                int i, fdt_index = -1;
@@ -628,8 +738,10 @@ void arch_crash_handle_hotplug_event(struct kimage *image, 
void *arg)
} else if (hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY ||
                   hp_action == KEXEC_CRASH_HP_ADD_MEMORY) {
-               pr_info_once("Crash update is not supported for memory 
hotplug\n");
-               return;
+               struct memory_notify *mn;
+
+               mn = (struct memory_notify *)arg;
+               update_crash_elfcorehdr(image, mn);
        }
  }
  #endif
diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index e2148a009701..2457d7ec2075 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -21,6 +21,8 @@
  #include <linux/memblock.h>
  #include <linux/slab.h>
  #include <linux/vmalloc.h>
+#include <linux/elf.h>
+
  #include <asm/setup.h>
  #include <asm/cputhreads.h>
  #include <asm/drmem.h>
@@ -740,7 +742,35 @@ static int load_elfcorehdr_segment(struct kimage *image, 
struct kexec_buf *kbuf)
kbuf->buffer = headers;
        kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
-       kbuf->bufsz = kbuf->memsz = headers_sz;
+       kbuf->bufsz = headers_sz;
+#if defined(CONFIG_CRASH_HOTPLUG) && defined(CONFIG_MEMORY_HOTPLUG)
+       /* Adjust the elfcorehdr segment size to accommodate
+        * future crash memory ranges.
+        */
+       int max_lmb;
+       unsigned long pnum;
+
+       /* In the worst case, a Phdr is needed for every other LMB to be
+        * represented as an individual crash range.
+        */
+       max_lmb = memory_hotplug_max() / (2 * drmem_lmb_size());
+
+       /* Do not cross the Phdr max limit of the elf header.
+        * Avoid counting Phdr for crash ranges (cmem->nr_ranges)
+        * which are already part of elfcorehdr.
+        */
+       if (max_lmb > PN_XNUM)
+               pnum = PN_XNUM - cmem->nr_ranges;
+       else
+               pnum = max_lmb - cmem->nr_ranges;
+
+       /* Additional buffer space for elfcorehdr to accommodate
+        * future memory ranges.
+        */
+       kbuf->memsz = headers_sz + pnum * sizeof(Elf64_Phdr);
+#else
+       kbuf->memsz = headers_sz;
+#endif
        kbuf->top_down = false;
ret = kexec_add_buffer(kbuf);
@@ -750,7 +780,7 @@ static int load_elfcorehdr_segment(struct kimage *image, 
struct kexec_buf *kbuf)
        }
image->elf_load_addr = kbuf->mem;
-       image->elf_headers_sz = headers_sz;
+       image->elf_headers_sz = kbuf->memsz;
        image->elf_headers = headers;
  out:
        kfree(cmem);
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index fb3e12f15214..4fd0c5d5607b 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -234,6 +234,91 @@ int add_mem_range(struct crash_mem **mem_ranges, u64 base, 
u64 size)
        return __add_mem_range(mem_ranges, base, size);
  }
+/**
+ * remove_mem_range - Removes the given memory range from the range list.
+ * @mem_ranges:    Range list to remove the memory range to.
+ * @base:          Base address of the range to remove.
+ * @size:          Size of the memory range to remove.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+       u64 end;
+       int ret = 0;
+       unsigned int i;
+       u64 mstart, mend;
+       struct crash_mem *mem_rngs = *mem_ranges;
+
+       if (!size)
+               return 0;
+
+       /*
+        * Memory range are stored as start and end address, use
+        * the same format to do remove operation.
+        */
+       end = base + size - 1;
+
+       for (i = 0; i < mem_rngs->nr_ranges; i++) {
+               mstart = mem_rngs->ranges[i].start;
+               mend = mem_rngs->ranges[i].end;
+
+               /*
+                * Memory range to remove is not part of this range entry
+                * in the memory range list
+                */
+               if (!(base >= mstart && end <= mend))
+                       continue;
+
+               /*
+                * Memory range to remove is equivalent to this entry in the
+                * memory range list. Remove the range entry from the list.
+                */
+               if (base == mstart && end == mend) {
+                       for (; i < mem_rngs->nr_ranges - 1; i++) {
+                               mem_rngs->ranges[i].start = 
mem_rngs->ranges[i+1].start;
+                               mem_rngs->ranges[i].end = 
mem_rngs->ranges[i+1].end;
+                       }
+                       mem_rngs->nr_ranges--;
+                       goto out;
+               }
+               /*
+                * Start address of the memory range to remove and the
+                * current memory range entry in the list is same. Just
+                * move the start address of the current memory range
+                * entry in the list to end + 1.
+                */
+               else if (base == mstart) {
+                       mem_rngs->ranges[i].start = end + 1;
+                       goto out;
+               }
+               /*
+                * End address of the memory range to remove and the
+                * current memory range entry in the list is same.
+                * Just move the end address of the current memory
+                * range entry in the list to base - 1.
+                */
+               else if (end == mend)  {
+                       mem_rngs->ranges[i].end = base - 1;
+                       goto out;
+               }
+               /*
+                * Memory range to remove is not at the edge of current
+                * memory range entry. Split the current memory entry into
+                * two half.
+                */
+               else {
+                       mem_rngs->ranges[i].end = base - 1;
+                       size = mem_rngs->ranges[i].end - end;
+                       ret = add_mem_range(mem_ranges, end + 1, size);
+               }
+       }
+out:
+       return ret;
+}
+
  /**
   * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
   * @mem_ranges:         Range list to add the memory range(s) to.
--
2.41.0


Reply via email to