Hi Mike, On Wed, Sep 17 2025, Mike Rapoport wrote:
> From: "Mike Rapoport (Microsoft)" <[email protected]> > > to make it clear that KHO operates on pages rather than on a random > physical address. > > The kho_preserve_pages() will be also used in upcoming support for > vmalloc preservation. > > Signed-off-by: Mike Rapoport (Microsoft) <[email protected]> > --- > include/linux/kexec_handover.h | 5 +++-- > kernel/kexec_handover.c | 25 +++++++++++-------------- > mm/memblock.c | 4 +++- > 3 files changed, 17 insertions(+), 17 deletions(-) > > diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h > index 348844cffb13..cc5c49b0612b 100644 > --- a/include/linux/kexec_handover.h > +++ b/include/linux/kexec_handover.h > @@ -18,6 +18,7 @@ enum kho_event { > > struct folio; > struct notifier_block; > +struct page; > > #define DECLARE_KHOSER_PTR(name, type) \ > union { \ > @@ -42,7 +43,7 @@ struct kho_serialization; > bool kho_is_enabled(void); > > int kho_preserve_folio(struct folio *folio); > -int kho_preserve_phys(phys_addr_t phys, size_t size); > +int kho_preserve_pages(struct page *page, unsigned int nr_pages); > struct folio *kho_restore_folio(phys_addr_t phys); > int kho_add_subtree(struct kho_serialization *ser, const char *name, void > *fdt); > int kho_retrieve_subtree(const char *name, phys_addr_t *phys); > @@ -65,7 +66,7 @@ static inline int kho_preserve_folio(struct folio *folio) > return -EOPNOTSUPP; > } > > -static inline int kho_preserve_phys(phys_addr_t phys, size_t size) > +static inline int kho_preserve_pages(struct page *page, unsigned int > nr_pages) > { > return -EOPNOTSUPP; > } > diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c > index f421acc58c1f..3ad59c5f9eaa 100644 > --- a/kernel/kexec_handover.c > +++ b/kernel/kexec_handover.c > @@ -698,26 +698,23 @@ int kho_preserve_folio(struct folio *folio) > EXPORT_SYMBOL_GPL(kho_preserve_folio); > > /** > - * kho_preserve_phys - preserve a physically contiguous range across kexec. > - * @phys: physical address of the range. > - * @size: size of the range. > + * kho_preserve_pages - preserve contiguous pages across kexec > + * @page: first page in the list. > + * @nr_pages: number of pages. > * > - * Instructs KHO to preserve the memory range from @phys to @phys + @size > - * across kexec. > + * Preserve a contiguous list of order 0 pages. Must be restored using > + * kho_restore_page() on each order 0 page. This is not true. The pages are preserved with the maximum order possible. while (pfn < end_pfn) { const unsigned int order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); err = __kho_preserve_order(track, pfn, order); [...] So four 0-order pages will be preserved as one 2-order page. Restoring them as four 0-order pages is wrong. And my proposed patch for checking the magic [0] will uncover this exact bug. I think you should either change the logic to always preserve at order 0, or maybe add a kho_restore_pages() that replicates the same order calculation. [0] https://lore.kernel.org/lkml/[email protected]/ > * > * Return: 0 on success, error code on failure > */ > -int kho_preserve_phys(phys_addr_t phys, size_t size) > +int kho_preserve_pages(struct page *page, unsigned int nr_pages) > { > - unsigned long pfn = PHYS_PFN(phys); > + struct kho_mem_track *track = &kho_out.ser.track; > + const unsigned long start_pfn = page_to_pfn(page); > + const unsigned long end_pfn = start_pfn + nr_pages; > + unsigned long pfn = start_pfn; > unsigned long failed_pfn = 0; > - const unsigned long start_pfn = pfn; > - const unsigned long end_pfn = PHYS_PFN(phys + size); > int err = 0; > - struct kho_mem_track *track = &kho_out.ser.track; > - > - if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size)) > - return -EINVAL; > > while (pfn < end_pfn) { > const unsigned int order = > @@ -737,7 +734,7 @@ int kho_preserve_phys(phys_addr_t phys, size_t size) > > return err; > } > -EXPORT_SYMBOL_GPL(kho_preserve_phys); > +EXPORT_SYMBOL_GPL(kho_preserve_pages); > > /* Handling for debug/kho/out */ > > diff --git a/mm/memblock.c b/mm/memblock.c > index 117d963e677c..6ec3eaa4e8d1 100644 > --- a/mm/memblock.c > +++ b/mm/memblock.c > @@ -2516,8 +2516,10 @@ static int reserve_mem_kho_finalize(struct > kho_serialization *ser) > > for (i = 0; i < reserved_mem_count; i++) { > struct reserve_mem_table *map = &reserved_mem_table[i]; > + struct page *page = phys_to_page(map->start); > + unsigned int nr_pages = map->size >> PAGE_SHIFT; > > - err |= kho_preserve_phys(map->start, map->size); > + err |= kho_preserve_pages(page, nr_pages); Unrelated to this patch, but since there is no kho_restore_{phys,pages}(), won't the reserve_mem memory end up with uninitialized struct pages, since preserved pages are memblock_reserved_mark_noinit()? That would also be a case for kho_restore_pages() I suppose? -- Regards, Pratyush Yadav
