On Fri, Nov 14 2025, Pasha Tatashin wrote: > Currently, clients of KHO must manually allocate memory (e.g., via > alloc_pages), calculate the page order, and explicitly call > kho_preserve_folio(). Similarly, cleanup requires separate calls to > unpreserve and free the memory. > > Introduce a high-level API to streamline this common pattern: > > - kho_alloc_preserve(size): Allocates physically contiguous, zeroed > memory and immediately marks it for preservation. > - kho_free_unpreserve(ptr, size): Unpreserves and frees the memory > in the current kernel. > - kho_free_restore(ptr, size): Restores the struct page state of > preserved memory in the new kernel and immediately frees it to the > page allocator.
Nit: kho_unpreserve_free() and kho_restore_free() make more sense to me since that is the order of operations. Having them the other way round is kind of confusing. Also, why do the free functions need size? They can get the order from folio_order(). This would save users of the API from having to store the size somewhere and make things simpler. > > Signed-off-by: Pasha Tatashin <[email protected]> > --- > include/linux/kexec_handover.h | 22 +++++-- > kernel/liveupdate/kexec_handover.c | 101 +++++++++++++++++++++++++++++ > 2 files changed, 116 insertions(+), 7 deletions(-) > > diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h > index 80ece4232617..76c496e01877 100644 > --- a/include/linux/kexec_handover.h > +++ b/include/linux/kexec_handover.h > @@ -2,8 +2,9 @@ > #ifndef LINUX_KEXEC_HANDOVER_H > #define LINUX_KEXEC_HANDOVER_H > > -#include <linux/types.h> > +#include <linux/err.h> > #include <linux/errno.h> > +#include <linux/types.h> > > struct kho_scratch { > phys_addr_t addr; > @@ -48,6 +49,9 @@ int kho_preserve_pages(struct page *page, unsigned int > nr_pages); > int kho_unpreserve_pages(struct page *page, unsigned int nr_pages); > int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation); > int kho_unpreserve_vmalloc(struct kho_vmalloc *preservation); > +void *kho_alloc_preserve(size_t size); > +void kho_free_unpreserve(void *mem, size_t size); > +void kho_free_restore(void *mem, size_t size); > struct folio *kho_restore_folio(phys_addr_t phys); > struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages); > void *kho_restore_vmalloc(const struct kho_vmalloc *preservation); > @@ -101,6 +105,14 @@ static inline int kho_unpreserve_vmalloc(struct > kho_vmalloc *preservation) > return -EOPNOTSUPP; > } > > +void *kho_alloc_preserve(size_t size) > +{ > + return ERR_PTR(-EOPNOTSUPP); > +} > + > +void kho_free_unpreserve(void *mem, size_t size) { } > +void kho_free_restore(void *mem, size_t size) { } > + > static inline struct folio *kho_restore_folio(phys_addr_t phys) > { > return NULL; > @@ -122,18 +134,14 @@ static inline int kho_add_subtree(const char *name, > void *fdt) > return -EOPNOTSUPP; > } > > -static inline void kho_remove_subtree(void *fdt) > -{ > -} > +static inline void kho_remove_subtree(void *fdt) { } > > static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys) > { > return -EOPNOTSUPP; > } > > -static inline void kho_memory_init(void) > -{ > -} > +static inline void kho_memory_init(void) { } > > static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, > phys_addr_t scratch_phys, u64 scratch_len) > diff --git a/kernel/liveupdate/kexec_handover.c > b/kernel/liveupdate/kexec_handover.c > index a905bccf5f65..9f05849fd68e 100644 > --- a/kernel/liveupdate/kexec_handover.c > +++ b/kernel/liveupdate/kexec_handover.c > @@ -4,6 +4,7 @@ > * Copyright (C) 2023 Alexander Graf <[email protected]> > * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <[email protected]> > * Copyright (C) 2025 Google LLC, Changyuan Lyu <[email protected]> > + * Copyright (C) 2025 Pasha Tatashin <[email protected]> > */ > > #define pr_fmt(fmt) "KHO: " fmt > @@ -1151,6 +1152,106 @@ void *kho_restore_vmalloc(const struct kho_vmalloc > *preservation) > } > EXPORT_SYMBOL_GPL(kho_restore_vmalloc); > > +/** > + * kho_alloc_preserve - Allocate, zero, and preserve memory. > + * @size: The number of bytes to allocate. > + * > + * Allocates a physically contiguous block of zeroed pages that is large > + * enough to hold @size bytes. The allocated memory is then registered with > + * KHO for preservation across a kexec. > + * > + * Note: The actual allocated size will be rounded up to the nearest > + * power-of-two page boundary. > + * > + * @return A virtual pointer to the allocated and preserved memory on > success, > + * or an ERR_PTR() encoded error on failure. > + */ > +void *kho_alloc_preserve(size_t size) > +{ > + struct folio *folio; > + int order, ret; > + > + if (!size) > + return ERR_PTR(-EINVAL); > + > + order = get_order(size); > + if (order > MAX_PAGE_ORDER) > + return ERR_PTR(-E2BIG); > + > + folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, order); > + if (!folio) > + return ERR_PTR(-ENOMEM); > + > + ret = kho_preserve_folio(folio); > + if (ret) { > + folio_put(folio); > + return ERR_PTR(ret); > + } > + > + return folio_address(folio); > +} > +EXPORT_SYMBOL_GPL(kho_alloc_preserve); > + > +/** > + * kho_free_unpreserve - Unpreserve and free memory. > + * @mem: Pointer to the memory allocated by kho_alloc_preserve(). > + * @size: The original size requested during allocation. This is used to > + * recalculate the correct order for freeing the pages. > + * > + * Unregisters the memory from KHO preservation and frees the underlying > + * pages back to the system. This function should be called to clean up > + * memory allocated with kho_alloc_preserve(). > + */ > +void kho_free_unpreserve(void *mem, size_t size) > +{ > + struct folio *folio; > + unsigned int order; > + > + if (!mem || !size) > + return; > + > + order = get_order(size); > + if (WARN_ON_ONCE(order > MAX_PAGE_ORDER)) > + return; > + > + folio = virt_to_folio(mem); > + WARN_ON_ONCE(kho_unpreserve_folio(folio)); This is what I meant in my reply to the previous patch. kho_unpreserve_folio() can be void now, so the WARN_ON_ONCE() is not needed. > + folio_put(folio); > +} > +EXPORT_SYMBOL_GPL(kho_free_unpreserve); > + > +/** > + * kho_free_restore - Restore and free memory after kexec. > + * @mem: Pointer to the memory (in the new kernel's address space) > + * that was allocated by the old kernel. > + * @size: The original size requested during allocation. This is used to > + * recalculate the correct order for freeing the pages. > + * > + * This function is intended to be called in the new kernel (post-kexec) > + * to take ownership of and free a memory region that was preserved by the > + * old kernel using kho_alloc_preserve(). > + * > + * It first restores the pages from KHO (using their physical address) > + * and then frees the pages back to the new kernel's page allocator. > + */ > +void kho_free_restore(void *mem, size_t size) On restore side, callers are already using the phys addr directly. So do kho_restore_folio() and kho_restore_pages() for example. This should follow suit for uniformity. Would also save the callers a __va() call and this function the __pa() call. > +{ > + struct folio *folio; > + unsigned int order; > + > + if (!mem || !size) > + return; > + > + order = get_order(size); > + if (WARN_ON_ONCE(order > MAX_PAGE_ORDER)) > + return; > + > + folio = kho_restore_folio(__pa(mem)); > + if (!WARN_ON(!folio)) kho_restore_folio() already WARNs on failure. So the WARN_ON() here can be skipped I think. > + free_pages((unsigned long)mem, order); folio_put() here makes more sense since we just restored a folio. > +} > +EXPORT_SYMBOL_GPL(kho_free_restore); > + > int kho_finalize(void) > { > int ret; -- Regards, Pratyush Yadav
