From: Cong Wang <[email protected]>

This commit introduces:

* Multikernel flag support in kexec_file_load by adding KEXEC_MULTIKERNEL
  to KEXEC_FILE_FLAGS, enabling user-space to specify multikernel
  operations through the file-based kexec interface with proper flag
  validation and ID extraction.

* Instance-based memory allocation for multikernel images through
  kexec_alloc_multikernel() that allocates kernel segments from
  instance-specific memory pools rather than system memory, ensuring
  compliance with device tree resource specifications.

* Multikernel control page allocation via 
kimage_alloc_multikernel_control_pages()
  that provides page-aligned control structures from instance pools
  with proper alignment validation and conflict detection against
  existing segments.

* Enhanced kimage_file_alloc_init() with multikernel instance association
  that extracts multikernel IDs from kexec flags, validates instance
  availability, establishes bidirectional cross-references, and updates
  instance states to LOADING during the load process.

* Integrated memory hole location in kexec_locate_mem_hole() that
  prioritizes multikernel instance pool allocation over system memory
  allocation, ensuring multikernel segments respect reserved memory
  boundaries and resource isolation.

The integration maintains compatibility with existing kexec_file_load()
use cases, such as crash kernel, while extending it for multikernel case.
Standard kexec operations continue to use system memory allocation, while
multikernel operations automatically use instance-specific pools when
the KEXEC_MULTIKERNEL flag is specified.

This enables user-space tools to load multikernel images using the
more secure and flexible kexec_file_load interface rather than the
legacy kexec_load syscall, providing better integration with modern
security frameworks and signed kernel verification.

Signed-off-by: Cong Wang <[email protected]>
---
 include/linux/kexec.h |   3 +-
 kernel/kexec_core.c   |  61 ++++++++++++++++++++++
 kernel/kexec_file.c   | 116 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 177 insertions(+), 3 deletions(-)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 5e9e9ad1dfeb..b907b7a92fd2 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -477,7 +477,8 @@ bool kexec_load_permitted(int kexec_image_type);
 /* List of defined/legal kexec file flags */
 #define KEXEC_FILE_FLAGS       (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
                                 KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \
-                                KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB)
+                                KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB | \
+                                KEXEC_MULTIKERNEL)
 
 /* flag to track if kexec reboot is in progress */
 extern bool kexec_in_progress;
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 7db755e64dd6..61ad01acd034 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -476,6 +476,64 @@ static struct page 
*kimage_alloc_crash_control_pages(struct kimage *image,
 }
 #endif
 
+static struct page *kimage_alloc_multikernel_control_pages(struct kimage 
*image,
+                                                          unsigned int order)
+{
+       /* Control pages for multikernel must be allocated from the instance's
+        * memory pool to ensure they stay within the reserved memory regions
+        * specified in the device tree configuration.
+        *
+        * We use mk_kimage_alloc() to get memory from the instance pool,
+        * then convert it to page structures.
+        */
+       void *virt_addr;
+       phys_addr_t phys_addr;
+       struct page *pages;
+       unsigned long size;
+       unsigned int count;
+
+       if (!image->mk_instance) {
+               pr_err("Multikernel image has no associated instance\n");
+               return NULL;
+       }
+
+       count = 1 << order;
+       size = count << PAGE_SHIFT;
+
+       /* Allocate from the multikernel instance pool (page aligned) */
+       virt_addr = mk_kimage_alloc(image, size, PAGE_SIZE);
+       if (!virt_addr) {
+               pr_debug("Failed to allocate %lu bytes for multikernel control 
pages\n", size);
+               return NULL;
+       }
+
+       /* Convert virtual address to physical */
+       phys_addr = virt_to_phys(virt_addr);
+
+       /* Check alignment requirements - control pages need page alignment */
+       if (!IS_ALIGNED(phys_addr, PAGE_SIZE)) {
+               pr_err("Multikernel control page allocation not page-aligned: 
phys=0x%llx\n",
+                      (unsigned long long)phys_addr);
+               mk_kimage_free(image, virt_addr, size);
+               return NULL;
+       }
+
+       /* Get the page structure */
+       pages = virt_to_page(virt_addr);
+
+       /* Check for conflicts with existing segments */
+       if (kimage_is_destination_range(image, phys_addr, phys_addr + size - 
1)) {
+               pr_debug("Multikernel control pages conflict with existing 
segments: 0x%llx+0x%lx\n",
+                        (unsigned long long)phys_addr, size);
+               mk_kimage_free(image, virt_addr, size);
+               return NULL;
+       }
+
+       pr_debug("Allocated multikernel control pages: order=%u, phys=0x%llx, 
virt=%px\n",
+                order, (unsigned long long)phys_addr, virt_addr);
+
+       return pages;
+}
 
 struct page *kimage_alloc_control_pages(struct kimage *image,
                                         unsigned int order)
@@ -491,6 +549,9 @@ struct page *kimage_alloc_control_pages(struct kimage 
*image,
                pages = kimage_alloc_crash_control_pages(image, order);
                break;
 #endif
+       case KEXEC_TYPE_MULTIKERNEL:
+               pages = kimage_alloc_multikernel_control_pages(image, order);
+               break;
        }
 
        return pages;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 2d9d5626c8da..f9979c1d9f9e 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -27,6 +27,7 @@
 #include <linux/syscalls.h>
 #include <linux/vmalloc.h>
 #include <linux/dma-map-ops.h>
+#include <linux/multikernel.h>
 #include "kexec_internal.h"
 
 #ifdef CONFIG_KEXEC_SIG
@@ -309,6 +310,7 @@ kimage_file_alloc_init(struct kimage **rimage, int 
kernel_fd,
        int ret;
        struct kimage *image;
        bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
+       bool multikernel_load = flags & KEXEC_MULTIKERNEL;
 
        image = do_kimage_alloc_init();
        if (!image)
@@ -322,8 +324,50 @@ kimage_file_alloc_init(struct kimage **rimage, int 
kernel_fd,
                /* Enable special crash kernel control page alloc policy. */
                image->control_page = crashk_res.start;
                image->type = KEXEC_TYPE_CRASH;
-       }
+       } else
 #endif
+       if (multikernel_load) {
+               struct mk_instance *instance;
+               int mk_id = KEXEC_GET_MK_ID(flags);
+
+               /* Set multikernel image type for proper memory allocation */
+               image->type = KEXEC_TYPE_MULTIKERNEL;
+
+               pr_info("kexec_file_load: multikernel load - flags=0x%lx, 
extracted mk_id=%d\n",
+                       flags, mk_id);
+
+               if (mk_id <= 0) {
+                       pr_err("Invalid multikernel ID %d in flags\n", mk_id);
+                       ret = -EINVAL;
+                       goto out_free_image;
+               }
+
+               /* Find the existing mk_instance */
+               instance = mk_instance_find(mk_id);
+               if (!instance) {
+                       pr_err("No multikernel instance found with ID %d\n", 
mk_id);
+                       ret = -ENOENT;
+                       goto out_free_image;
+               }
+
+               /* Check if instance is already associated with a kimage */
+               if (instance->kimage) {
+                       pr_err("Multikernel instance %d already has an 
associated kimage\n", mk_id);
+                       mk_instance_put(instance);
+                       ret = -EBUSY;
+                       goto out_free_image;
+               }
+
+               /* Establish cross-references */
+               image->mk_instance = instance; /* Transfer reference from find 
*/
+               image->mk_id = mk_id;
+               instance->kimage = image;
+
+               /* Update instance state */
+               mk_instance_set_state(instance, MK_STATE_LOADING);
+
+               pr_info("Associated kimage with multikernel instance %d\n", 
mk_id);
+       }
 
        ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
                                           cmdline_ptr, cmdline_len, flags);
@@ -731,6 +775,61 @@ static int kexec_alloc_contig(struct kexec_buf *kbuf)
        return 0;
 }
 
+static int kexec_alloc_multikernel(struct kexec_buf *kbuf)
+{
+       void *virt_addr;
+       phys_addr_t phys_addr;
+
+       pr_info("kexec_alloc_multikernel: called for segment size=0x%lx, 
buf_min=0x%lx, buf_max=0x%lx, align=0x%lx\n",
+               kbuf->memsz, kbuf->buf_min, kbuf->buf_max, kbuf->buf_align);
+
+       /* Check if this is a multikernel image with an associated instance */
+       if (!kbuf->image->mk_instance || kbuf->image->type != 
KEXEC_TYPE_MULTIKERNEL) {
+               pr_info("kexec_alloc_multikernel: not a multikernel image 
(mk_instance=%p, type=%d)\n",
+                       kbuf->image->mk_instance, kbuf->image->type);
+               return -EPERM;
+       }
+
+       /* Allocate from the multikernel instance pool using the proper API */
+       virt_addr = mk_kimage_alloc(kbuf->image, kbuf->memsz, kbuf->buf_align);
+       if (!virt_addr) {
+               pr_info("Failed to allocate %lu bytes from multikernel instance 
pool (align=0x%lx)\n",
+                       kbuf->memsz, kbuf->buf_align);
+               return -ENOMEM;
+       }
+
+       /* Convert virtual address to physical */
+       phys_addr = virt_to_phys(virt_addr);
+
+       if (!IS_ALIGNED(phys_addr, kbuf->buf_align)) {
+               pr_info("Multikernel allocation not aligned: phys=0x%llx, 
required=0x%lx\n",
+                        (unsigned long long)phys_addr, kbuf->buf_align);
+               mk_kimage_free(kbuf->image, virt_addr, kbuf->memsz);
+               return -ENOMEM;
+       }
+
+       if (phys_addr < kbuf->buf_min || (phys_addr + kbuf->memsz - 1) > 
kbuf->buf_max) {
+               pr_info("Multikernel allocation out of bounds: phys=0x%llx, 
min=0x%lx, max=0x%lx\n",
+                        (unsigned long long)phys_addr, kbuf->buf_min, 
kbuf->buf_max);
+               mk_kimage_free(kbuf->image, virt_addr, kbuf->memsz);
+               return -ENOMEM;
+       }
+
+       if (kimage_is_destination_range(kbuf->image, phys_addr, phys_addr + 
kbuf->memsz - 1)) {
+               pr_info("Multikernel allocation conflicts with existing 
segments: 0x%llx+0x%lx\n",
+                        (unsigned long long)phys_addr, kbuf->memsz);
+               mk_kimage_free(kbuf->image, virt_addr, kbuf->memsz);
+               return -EBUSY;
+       }
+
+       kbuf->mem = phys_addr;
+
+       pr_info("Allocated %lu bytes from multikernel pool at 0x%llx 
(virt=%px)\n",
+                kbuf->memsz, (unsigned long long)phys_addr, virt_addr);
+
+       return 0;
+}
+
 /**
  * kexec_locate_mem_hole - find free memory for the purgatory or the next 
kernel
  * @kbuf:      Parameters for the memory search.
@@ -743,8 +842,21 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
 {
        int ret;
 
+       pr_info("kexec_locate_mem_hole: called for segment size=0x%lx, 
mem=0x%lx, image_type=%d\n",
+               kbuf->memsz, kbuf->mem, kbuf->image->type);
+
        /* Arch knows where to place */
-       if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN)
+       if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN) {
+               pr_info("kexec_locate_mem_hole: memory already specified 
(0x%lx), skipping allocation\n", kbuf->mem);
+               return 0;
+       }
+
+       /*
+        * If this is a multikernel image, try to allocate from the instance's
+        * memory pool first. This ensures multikernel segments use pre-reserved
+        * memory from the device tree configuration and respects the pool 
management.
+        */
+       if (!kexec_alloc_multikernel(kbuf))
                return 0;
 
        /*
-- 
2.34.1


Reply via email to