Historically when freeing pages, free_one_page() assumed that callers
had IRQs disabled and the zone->lock could be acquired with spin_lock().
This confuses the scope of what local_lock_irq is protecting and what
zone->lock is protecting in free_unref_page_list in particular.

This patch uses spin_lock_irqsave() for the zone->lock in
free_one_page() instead of relying on callers to have disabled
IRQs. free_unref_page_commit() is changed to only deal with PCP pages
protected by the local lock. free_unref_page_list() then first frees
isolated pages to the buddy lists with free_one_page() and frees the rest
of the pages to the PCP via free_unref_page_commit(). The end result
is that free_one_page() is no longer depending on side-effects of
local_lock to be correct.

Note that this may incur a performance penalty while memory hot-remove
is running but that is not a common operation.

Signed-off-by: Mel Gorman <mgor...@techsingularity.net>
---
 mm/page_alloc.c | 67 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6791e9361076..a0b210077178 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1473,10 +1473,12 @@ static void free_one_page(struct zone *zone,
                                unsigned int order,
                                int migratetype, fpi_t fpi_flags)
 {
-       spin_lock(&zone->lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&zone->lock, flags);
        migratetype = check_migratetype_isolated(zone, page, pfn, migratetype);
        __free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
-       spin_unlock(&zone->lock);
+       spin_unlock_irqrestore(&zone->lock, flags);
 }
 
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -3238,31 +3240,13 @@ static bool free_unref_page_prepare(struct page *page, 
unsigned long pfn)
        return true;
 }
 
-static void free_unref_page_commit(struct page *page, unsigned long pfn)
+static void free_unref_page_commit(struct page *page, unsigned long pfn,
+                                  int migratetype)
 {
        struct zone *zone = page_zone(page);
        struct per_cpu_pages *pcp;
-       int migratetype;
 
-       migratetype = get_pcppage_migratetype(page);
        __count_vm_event(PGFREE);
-
-       /*
-        * We only track unmovable, reclaimable and movable on pcp lists.
-        * Free ISOLATE pages back to the allocator because they are being
-        * offlined but treat HIGHATOMIC as movable pages so we can get those
-        * areas back if necessary. Otherwise, we may have to free
-        * excessively into the page allocator
-        */
-       if (migratetype >= MIGRATE_PCPTYPES) {
-               if (unlikely(is_migrate_isolate(migratetype))) {
-                       free_one_page(zone, page, pfn, 0, migratetype,
-                                     FPI_NONE);
-                       return;
-               }
-               migratetype = MIGRATE_MOVABLE;
-       }
-
        pcp = this_cpu_ptr(zone->per_cpu_pageset);
        list_add(&page->lru, &pcp->lists[migratetype]);
        pcp->count++;
@@ -3277,12 +3261,29 @@ void free_unref_page(struct page *page)
 {
        unsigned long flags;
        unsigned long pfn = page_to_pfn(page);
+       int migratetype;
 
        if (!free_unref_page_prepare(page, pfn))
                return;
 
+       /*
+        * We only track unmovable, reclaimable and movable on pcp lists.
+        * Place ISOLATE pages on the isolated list because they are being
+        * offlined but treat HIGHATOMIC as movable pages so we can get those
+        * areas back if necessary. Otherwise, we may have to free
+        * excessively into the page allocator
+        */
+       migratetype = get_pcppage_migratetype(page);
+       if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
+               if (unlikely(is_migrate_isolate(migratetype))) {
+                       free_one_page(page_zone(page), page, pfn, 0, 
migratetype, FPI_NONE);
+                       return;
+               }
+               migratetype = MIGRATE_MOVABLE;
+       }
+
        local_lock_irqsave(&pagesets.lock, flags);
-       free_unref_page_commit(page, pfn);
+       free_unref_page_commit(page, pfn, migratetype);
        local_unlock_irqrestore(&pagesets.lock, flags);
 }
 
@@ -3294,6 +3295,7 @@ void free_unref_page_list(struct list_head *list)
        struct page *page, *next;
        unsigned long flags, pfn;
        int batch_count = 0;
+       int migratetype;
 
        /* Prepare pages for freeing */
        list_for_each_entry_safe(page, next, list, lru) {
@@ -3301,15 +3303,28 @@ void free_unref_page_list(struct list_head *list)
                if (!free_unref_page_prepare(page, pfn))
                        list_del(&page->lru);
                set_page_private(page, pfn);
+
+               /*
+                * Free isolated pages directly to the allocator, see
+                * comment in free_unref_page.
+                */
+               migratetype = get_pcppage_migratetype(page);
+               if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
+                       if (unlikely(is_migrate_isolate(migratetype))) {
+                               free_one_page(page_zone(page), page, pfn, 0,
+                                                       migratetype, FPI_NONE);
+                               list_del(&page->lru);
+                       }
+               }
        }
 
        local_lock_irqsave(&pagesets.lock, flags);
        list_for_each_entry_safe(page, next, list, lru) {
-               unsigned long pfn = page_private(page);
-
+               pfn = page_private(page);
                set_page_private(page, 0);
+               migratetype = get_pcppage_migratetype(page);
                trace_mm_page_free_batched(page);
-               free_unref_page_commit(page, pfn);
+               free_unref_page_commit(page, pfn, migratetype);
 
                /*
                 * Guard against excessive IRQ disabled times when we get
-- 
2.26.2

Reply via email to