Hi Pingfan,

On Mon, 19 Jan 2026 11:24:15 +0800
Pingfan Liu <[email protected]> wrote:

[...]

> diff --git a/kernel/kexec_bpf_loader.c b/kernel/kexec_bpf_loader.c
> new file mode 100644
> index 0000000000000..dc59e1389da94
> --- /dev/null
> +++ b/kernel/kexec_bpf_loader.c
> @@ -0,0 +1,161 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Kexec image bpf section helpers
> + *
> + * Copyright (C) 2025, 2026 Red Hat, Inc
> + */
> +
> +#define pr_fmt(fmt)  "kexec_file(Image): " fmt
> +
> +#include <linux/err.h>
> +#include <linux/errno.h>
> +#include <linux/list.h>
> +#include <linux/kernel.h>
> +#include <linux/vmalloc.h>
> +#include <linux/kexec.h>
> +#include <linux/elf.h>
> +#include <linux/string.h>
> +#include <linux/bpf.h>
> +#include <linux/filter.h>
> +#include <asm/byteorder.h>
> +#include <asm/image.h>
> +#include <asm/memory.h>
> +#include "kexec_internal.h"
> +
> +/* Load a ELF */
> +static int arm_bpf_prog(char *bpf_elf, unsigned long sz)
> +{
> +     return 0;
> +}
> +
> +static void disarm_bpf_prog(void)
> +{
> +}
> +
> +struct kexec_context {
> +     bool kdump;
> +     char *kernel;
> +     int kernel_sz;
> +     char *initrd;
> +     int initrd_sz;
> +     char *cmdline;
> +     int cmdline_sz;
> +};
> +
> +void kexec_image_parser_anchor(struct kexec_context *context,
> +             unsigned long parser_id);
> +
> +/*
> + * optimize("O0") prevents inline, compiler constant propagation
> + *
> + * Let bpf be the program context pointer so that it will not be spilled into
> + * stack.
> + */
> +__attribute__((used, optimize("O0"))) void kexec_image_parser_anchor(
> +             struct kexec_context *context,
> +             unsigned long parser_id)
> +{
> +     /*
> +      * To prevent linker from Identical Code Folding (ICF) with 
> kexec_image_parser_anchor,
> +      * making them have different code.
> +      */
> +     volatile int dummy = 0;
> +
> +     dummy += 1;
> +}
> +
> +
> +BTF_KFUNCS_START(kexec_modify_return_ids)
> +BTF_ID_FLAGS(func, kexec_image_parser_anchor, KF_SLEEPABLE)
> +BTF_KFUNCS_END(kexec_modify_return_ids)
> +
> +static const struct btf_kfunc_id_set kexec_modify_return_set = {
> +     .owner = THIS_MODULE,
> +     .set = &kexec_modify_return_ids,
> +};
> +
> +static int __init kexec_bpf_prog_run_init(void)
> +{
> +     return register_btf_fmodret_id_set(&kexec_modify_return_set);
> +}
> +late_initcall(kexec_bpf_prog_run_init);
> +
> +static int kexec_buff_parser(struct bpf_parser_context *parser)
> +{
> +     return 0;
> +}
> +
> +/* At present, only PE format file with .bpf section is supported */
> +#define file_has_bpf_section pe_has_bpf_section
> +#define file_get_section     pe_get_section
> +
> +int decompose_kexec_image(struct kimage *image, int extended_fd)
> +{
> +     struct kexec_context context = { 0 };
> +     struct bpf_parser_context *bpf;
> +     unsigned long kernel_sz, bpf_sz;
> +     char *kernel_start, *bpf_start;
> +     int ret = 0;
> +
> +     if (image->type != KEXEC_TYPE_CRASH)
> +             context.kdump = false;
> +     else
> +             context.kdump = true;
> +
> +     kernel_start = image->kernel_buf;
> +     kernel_sz = image->kernel_buf_len;
> +
> +     while (file_has_bpf_section(kernel_start, kernel_sz)) {
> +
> +             bpf = alloc_bpf_parser_context(kexec_buff_parser, &context);
> +             if (!bpf)
> +                     return -ENOMEM;
> +             file_get_section((const char *)kernel_start, ".bpf", 
> &bpf_start, &bpf_sz);
> +             if (!!bpf_sz) {
> +                     /* load and attach bpf-prog */
> +                     ret = arm_bpf_prog(bpf_start, bpf_sz);
> +                     if (ret) {
> +                             put_bpf_parser_context(bpf);
> +                             pr_err("Fail to load .bpf section\n");
> +                             goto err;
> +                     }
> +             }

I'm not sure this works as intended. In case a .bpf section exists but
bpf_sz is 0, the function will skip arming the bpf-prog but still
continue. That doesn't look right to me. IIUC a zero size bpf-prog
should be an error. Or am I missing something?

Thanks
Philipp

> +             context.kernel = kernel_start;
> +             context.kernel_sz = kernel_sz;
> +             /* bpf-prog fentry, which handle above buffers. */
> +             kexec_image_parser_anchor(&context, (unsigned long)bpf);
> +
> +             /*
> +              * Container may be nested and should be unfold one by one.
> +              * The former bpf-prog should prepare 'kernel', 'initrd',
> +              * 'cmdline' for the next phase by calling kexec_buff_parser()
> +              */
> +             kernel_start = context.kernel;
> +             kernel_sz = context.kernel_sz;
> +
> +             /*
> +              * detach the current bpf-prog from their attachment points.
> +              */
> +             disarm_bpf_prog();
> +             put_bpf_parser_context(bpf);
> +     }
> +
> +     /*
> +      * image's kernel_buf, initrd_buf, cmdline_buf are set. Now they should
> +      * be updated to the new content.
> +      */
> +     image->kernel_buf = context.kernel;
> +     image->kernel_buf_len = context.kernel_sz;
> +     image->initrd_buf = context.initrd;
> +     image->initrd_buf_len = context.initrd_sz;
> +     image->cmdline_buf = context.cmdline;
> +     image->cmdline_buf_len = context.cmdline_sz;
> +
> +     return 0;
> +err:
> +     vfree(context.kernel);
> +     vfree(context.initrd);
> +     vfree(context.cmdline);
> +     return ret;
> +}
> +
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index 0222d17072d40..f9674bb5bd8db 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -238,7 +238,14 @@ kimage_file_prepare_segments(struct kimage *image, int 
> kernel_fd, int initrd_fd,
>               goto out;
>  #endif
>  
> -     /* Call arch image probe handlers */
> +     if (IS_ENABLED(CONFIG_KEXEC_BPF))
> +             decompose_kexec_image(image, initrd_fd);
> +
> +     /*
> +      * From this point, the kexec subsystem handle the kernel boot protocol.
> +      *
> +      * Call arch image probe handlers
> +      */
>       ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
>                                           image->kernel_buf_len);
>       if (ret)
> diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
> index 8e5e5c1237732..ee01d0c8bb377 100644
> --- a/kernel/kexec_internal.h
> +++ b/kernel/kexec_internal.h
> @@ -39,6 +39,7 @@ extern size_t kexec_purgatory_size;
>  extern bool pe_has_bpf_section(const char *file_buf, unsigned long pe_sz);
>  extern int pe_get_section(const char *file_buf, const char *sect_name,
>               char **sect_start, unsigned long *sect_sz);
> +extern int decompose_kexec_image(struct kimage *image, int extended_fd);
>  #else /* CONFIG_KEXEC_FILE */
>  static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
>  #endif /* CONFIG_KEXEC_FILE */

Reply via email to