ZONE_DEVICE pages have a struct dev_pagemap which is allocated by a
driver. When the struct page is first allocated by the kernel in
memremap_pages() a reference is taken on the associated pagemap to
ensure it is not freed prior to the pages being freed.
Prior to 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page
refcount") pages were considered free and returned to the driver when
the reference count dropped to one. However the pagemap reference was
not dropped until the page reference count hit zero. This would occur as
part of the final put_page() in memunmap_pages() which would wait for
all pages to be freed prior to returning.
When the extra refcount was removed the pagemap reference was no longer
being dropped in put_page(). Instead memunmap_pages() was changed to
explicitly drop the pagemap references. This means that memunmap_pages()
can complete even though pages are still mapped by the kernel which can
lead to kernel crashes, particularly if a driver frees the pagemap.
To fix this drivers should take a pagemap reference when allocating the
page. This reference can then be returned when the page is freed.
Signed-off-by: Alistair Popple <[email protected]>
Fixes: 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page refcount")
Cc: Jason Gunthorpe <[email protected]>
Cc: Felix Kuehling <[email protected]>
Cc: Alex Deucher <[email protected]>
Cc: Christian König <[email protected]>
Cc: Ben Skeggs <[email protected]>
Cc: Lyude Paul <[email protected]>
Cc: Ralph Campbell <[email protected]>
Cc: Alex Sierra <[email protected]>
Cc: John Hubbard <[email protected]>
Cc: Dan Williams <[email protected]>
---
Again I expect this will conflict with Dan's series. This implements the
first suggestion from Jason at
https://lore.kernel.org/linux-mm/[email protected]/ so
whatever we end up doing for DAX we should do the same here.
---
mm/memremap.c | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/mm/memremap.c b/mm/memremap.c
index 1c2c038..421bec3 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -138,8 +138,11 @@ void memunmap_pages(struct dev_pagemap *pgmap)
int i;
percpu_ref_kill(&pgmap->ref);
- for (i = 0; i < pgmap->nr_range; i++)
- percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i));
+ if (pgmap->type != MEMORY_DEVICE_PRIVATE &&
+ pgmap->type != MEMORY_DEVICE_COHERENT)
+ for (i = 0; i < pgmap->nr_range; i++)
+ percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i));
+
wait_for_completion(&pgmap->done);
for (i = 0; i < pgmap->nr_range; i++)
@@ -264,7 +267,9 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct
mhp_params *params,
memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
PHYS_PFN(range->start),
PHYS_PFN(range_len(range)), pgmap);
- percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
+ if (pgmap->type != MEMORY_DEVICE_PRIVATE &&
+ pgmap->type != MEMORY_DEVICE_COHERENT)
+ percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
return 0;
err_add_memory:
@@ -502,16 +507,24 @@ void free_zone_device_page(struct page *page)
page->mapping = NULL;
page->pgmap->ops->page_free(page);
- /*
- * Reset the page count to 1 to prepare for handing out the page again.
- */
if (page->pgmap->type != MEMORY_DEVICE_PRIVATE &&
page->pgmap->type != MEMORY_DEVICE_COHERENT)
+ /*
+ * Reset the page count to 1 to prepare for handing out the page
+ * again.
+ */
set_page_count(page, 1);
+ else
+ put_dev_pagemap(page->pgmap);
}
void zone_device_page_init(struct page *page)
{
+ /*
+ * Drivers shouldn't be allocating pages after calling
+ * memunmap_pages().
+ */
+ WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref));
set_page_count(page, 1);
lock_page(page);
}
--
git-series 0.9.1