On 08/21/25 at 04:53am, Andrew Morton wrote:
> On Thu, 21 Aug 2025 16:33:26 +0800 Baoquan He <b...@redhat.com> wrote:
> 
> > On 08/20/25 at 09:47pm, Andrew Morton wrote:
> > > On Tue, 5 Aug 2025 14:15:26 -0700 Brian Mak <m...@juniper.net> wrote:
......snip.....
> ---
> 
>  include/linux/kexec.h |    3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> --- a/include/linux/kexec.h~kexec-add-kexec_file_no_cma-as-a-legal-flag
> +++ a/include/linux/kexec.h
> @@ -460,7 +460,8 @@ bool kexec_load_permitted(int kexec_imag
>  
>  /* List of defined/legal kexec file flags */
>  #define KEXEC_FILE_FLAGS     (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
> -                              KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG)
> +                              KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \
> +                              KEXEC_FILE_NO_CMA)
>  
>  /* flag to track if kexec reboot is in progress */
>  extern bool kexec_in_progress;

Yeah, this is a good catch and great fix. Without this fix,
kexec_file_load syscall will failed and return '-EINVAL' when
KEXEC_FILE_NO_CMA is specified just as below code shows. So, for this
patch, 

Acked-by: Baoquan He <b...@redhat.com>


And, by the way, has the user space kexec-tools got the change merged
to allow KEXEC_FILE_NO_CMA specified?

And, Alexander, I am wondering why this is not captured when you test
specifying KEXEC_FILE_NO_CMA case. Or you just skip the no_cma case
testing?

===================================================================
SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, 
                unsigned long, cmdline_len, const char __user *, cmdline_ptr,
                unsigned long, flags)
{               
        int image_type = (flags & KEXEC_FILE_ON_CRASH) ?
                         KEXEC_TYPE_CRASH : KEXEC_TYPE_DEFAULT;
        struct kimage **dest_image, *image;
        int ret = 0, i;

        /* We only trust the superuser with rebooting the system. */
        if (!kexec_load_permitted(image_type))
                return -EPERM;

        /* Make sure we have a legal set of flags */
        if (flags != (flags & KEXEC_FILE_FLAGS))
                return -EINVAL;
         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        ......
}
=====================================================

> _
> 
> 
> and the second patch I placed in mm-unstable:
> 
> From: Brian Mak <m...@juniper.net>
> Subject: x86/kexec: carry forward the boot DTB on kexec
> Date: Tue, 5 Aug 2025 14:15:27 -0700
> 
> Currently, the kexec_file_load syscall on x86 does not support passing a
> device tree blob to the new kernel.  Some embedded x86 systems use device
> trees.  On these systems, failing to pass a device tree to the new kernel
> causes a boot failure.
> 
> To add support for this, we copy the behavior of ARM64 and PowerPC and
> copy the current boot's device tree blob for use in the new kernel.  We do
> this on x86 by passing the device tree blob as a setup_data entry in
> accordance with the x86 boot protocol.
> 
> This behavior is gated behind the KEXEC_FILE_FORCE_DTB flag.
> 
> Link: https://lkml.kernel.org/r/20250805211527.122367-3-m...@juniper.net
> Signed-off-by: Brian Mak <m...@juniper.net>
> Cc: Alexander Graf <g...@amazon.com>
> Cc: Baoquan He <b...@redhat.com>
> Cc: Borislav Betkov <b...@alien8.de>
> Cc: Dave Young <dyo...@redhat.com>
> Cc: "H. Peter Anvin" <h...@zytor.com>
> Cc: Ingo Molnar <mi...@redhat.com>
> Cc: Rob Herring <r...@kernel.org>
> Cc: Saravana Kannan <sarava...@google.com>
> Cc: Thomas Gleinxer <t...@linutronix.de>
> Signed-off-by: Andrew Morton <a...@linux-foundation.org>
> ---
> 
>  arch/x86/kernel/kexec-bzimage64.c |   47 ++++++++++++++++++++++++++--
>  include/linux/kexec.h             |    5 ++
>  include/uapi/linux/kexec.h        |    4 ++
>  kernel/kexec_file.c               |    1 
>  4 files changed, 53 insertions(+), 4 deletions(-)
> 
> --- 
> a/arch/x86/kernel/kexec-bzimage64.c~x86-kexec-carry-forward-the-boot-dtb-on-kexec
> +++ a/arch/x86/kernel/kexec-bzimage64.c
> @@ -16,6 +16,8 @@
>  #include <linux/kexec.h>
>  #include <linux/kernel.h>
>  #include <linux/mm.h>
> +#include <linux/libfdt.h>
> +#include <linux/of_fdt.h>
>  #include <linux/efi.h>
>  #include <linux/random.h>
>  
> @@ -212,6 +214,28 @@ setup_efi_state(struct boot_params *para
>  }
>  #endif /* CONFIG_EFI */
>  
> +#ifdef CONFIG_OF_FLATTREE
> +static void setup_dtb(struct boot_params *params,
> +                   unsigned long params_load_addr,
> +                   unsigned int dtb_setup_data_offset)
> +{
> +     struct setup_data *sd = (void *)params + dtb_setup_data_offset;
> +     unsigned long setup_data_phys, dtb_len;
> +
> +     dtb_len = fdt_totalsize(initial_boot_params);
> +     sd->type = SETUP_DTB;
> +     sd->len = dtb_len;
> +
> +     /* Carry over current boot DTB with setup_data */
> +     memcpy(sd->data, initial_boot_params, dtb_len);
> +
> +     /* Add setup data */
> +     setup_data_phys = params_load_addr + dtb_setup_data_offset;
> +     sd->next = params->hdr.setup_data;
> +     params->hdr.setup_data = setup_data_phys;
> +}
> +#endif /* CONFIG_OF_FLATTREE */
> +
>  static void
>  setup_ima_state(const struct kimage *image, struct boot_params *params,
>               unsigned long params_load_addr,
> @@ -336,6 +360,17 @@ setup_boot_parameters(struct kimage *ima
>                       sizeof(struct efi_setup_data);
>  #endif
>  
> +#ifdef CONFIG_OF_FLATTREE
> +     if (image->force_dtb && initial_boot_params) {
> +             setup_dtb(params, params_load_addr, setup_data_offset);
> +             setup_data_offset += sizeof(struct setup_data) +
> +                                  fdt_totalsize(initial_boot_params);
> +     } else {
> +             pr_debug("Not carrying over DTB, force_dtb = %d\n",
> +                      image->force_dtb);
> +     }
> +#endif
> +
>       if (IS_ENABLED(CONFIG_IMA_KEXEC)) {
>               /* Setup IMA log buffer state */
>               setup_ima_state(image, params, params_load_addr,
> @@ -529,6 +564,12 @@ static void *bzImage64_load(struct kimag
>                               sizeof(struct setup_data) +
>                               RNG_SEED_LENGTH;
>  
> +#ifdef CONFIG_OF_FLATTREE
> +     if (image->force_dtb && initial_boot_params)
> +             kbuf.bufsz += sizeof(struct setup_data) +
> +                           fdt_totalsize(initial_boot_params);
> +#endif
> +
>       if (IS_ENABLED(CONFIG_IMA_KEXEC))
>               kbuf.bufsz += sizeof(struct setup_data) +
>                             sizeof(struct ima_setup_data);
> @@ -537,7 +578,7 @@ static void *bzImage64_load(struct kimag
>               kbuf.bufsz += sizeof(struct setup_data) +
>                             sizeof(struct kho_data);
>  
> -     params = kzalloc(kbuf.bufsz, GFP_KERNEL);
> +     params = kvzalloc(kbuf.bufsz, GFP_KERNEL);

Wondering how big the dtb blob is, can you explain a little bit about
the kvzalloc usage here?

Except of this, I have no other concern about this patch.

And what's your plan about the user space kexec-tool change?

>       if (!params)
>               return ERR_PTR(-ENOMEM);
>       efi_map_offset = params_cmdline_sz;
> @@ -647,7 +688,7 @@ static void *bzImage64_load(struct kimag
>       return ldata;
>  
>  out_free_params:
> -     kfree(params);
> +     kvfree(params);
>       return ERR_PTR(ret);
>  }
>  
> @@ -659,7 +700,7 @@ static int bzImage64_cleanup(void *loade
>       if (!ldata)
>               return 0;
>  
> -     kfree(ldata->bootparams_buf);
> +     kvfree(ldata->bootparams_buf);
>       ldata->bootparams_buf = NULL;
>  
>       return 0;
> --- a/include/linux/kexec.h~x86-kexec-carry-forward-the-boot-dtb-on-kexec
> +++ a/include/linux/kexec.h
> @@ -395,6 +395,9 @@ struct kimage {
>  
>       /* Information for loading purgatory */
>       struct purgatory_info purgatory_info;
> +
> +     /* Force carrying over the DTB from the current boot */
> +     bool force_dtb;
>  #endif
>  
>  #ifdef CONFIG_CRASH_HOTPLUG
> @@ -461,7 +464,7 @@ bool kexec_load_permitted(int kexec_imag
>  /* List of defined/legal kexec file flags */
>  #define KEXEC_FILE_FLAGS     (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
>                                KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \
> -                              KEXEC_FILE_NO_CMA)
> +                              KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB)
>  
>  /* flag to track if kexec reboot is in progress */
>  extern bool kexec_in_progress;
> --- a/include/uapi/linux/kexec.h~x86-kexec-carry-forward-the-boot-dtb-on-kexec
> +++ a/include/uapi/linux/kexec.h
> @@ -22,12 +22,16 @@
>   * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
>   * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
>   *                           fd field.
> + * KEXEC_FILE_FORCE_DTB : Force carrying over the current boot's DTB to the 
> new
> + *                        kernel on x86. This is already the default 
> behavior on
> + *                        some other architectures, like ARM64 and PowerPC.
>   */
>  #define KEXEC_FILE_UNLOAD    0x00000001
>  #define KEXEC_FILE_ON_CRASH  0x00000002
>  #define KEXEC_FILE_NO_INITRAMFS      0x00000004
>  #define KEXEC_FILE_DEBUG     0x00000008
>  #define KEXEC_FILE_NO_CMA    0x00000010
> +#define KEXEC_FILE_FORCE_DTB 0x00000020
>  
>  /* These values match the ELF architecture values.
>   * Unless there is a good reason that should continue to be the case.
> --- a/kernel/kexec_file.c~x86-kexec-carry-forward-the-boot-dtb-on-kexec
> +++ a/kernel/kexec_file.c
> @@ -255,6 +255,7 @@ kimage_file_prepare_segments(struct kima
>       }
>  
>       image->no_cma = !!(flags & KEXEC_FILE_NO_CMA);
> +     image->force_dtb = flags & KEXEC_FILE_FORCE_DTB;
>  
>       if (cmdline_len) {
>               image->cmdline_buf = memdup_user(cmdline_ptr, cmdline_len);
> _
> 


Reply via email to