[PATCH 1/3] fixup! mm: alloc_contig_freed_pages() added
From: Michal Nazarewicz min...@mina86.com Signed-off-by: Michal Nazarewicz min...@mina86.com --- include/linux/page-isolation.h |4 ++- mm/page_alloc.c| 66 ++- 2 files changed, 60 insertions(+), 10 deletions(-) On Fri, 2011-08-19 at 16:27 +0200, Marek Szyprowski wrote: +unsigned long alloc_contig_freed_pages(unsigned long start, unsigned long end, + gfp_t flag) +{ +unsigned long pfn = start, count; +struct page *page; +struct zone *zone; +int order; + +VM_BUG_ON(!pfn_valid(start)); +zone = page_zone(pfn_to_page(start)); On Thu, 08 Sep 2011 20:05:52 +0200, Dave Hansen d...@linux.vnet.ibm.com wrote: This implies that start-end are entirely contained in a single zone. What enforces that? If some higher layer enforces that, I think we probably need at least a VM_BUG_ON() in here and a comment about who enforces it. +spin_lock_irq(zone-lock); + +page = pfn_to_page(pfn); +for (;;) { +VM_BUG_ON(page_count(page) || !PageBuddy(page)); +list_del(page-lru); +order = page_order(page); +zone-free_area[order].nr_free--; +rmv_page_order(page); +__mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL order)); +pfn += 1 order; +if (pfn = end) +break; +VM_BUG_ON(!pfn_valid(pfn)); +page += 1 order; +} This 'struct page *'++ stuff is OK, but only for small, aligned areas. For at least some of the sparsemem modes (non-VMEMMAP), you could walk off of the end of the section_mem_map[] when you cross a MAX_ORDER boundary. I'd feel a little bit more comfortable if pfn_to_page() was being done each time, or only occasionally when you cross a section boundary. Do the attached changes seem to make sense? I wanted to avoid calling pfn_to_page() each time as it seem fairly expensive in sparsemem and disctontig modes. At the same time, the macro trickery is so that users of sparsemem-vmemmap and flatmem won't have to pay the price. diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index b2a81fd..003c52f 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -46,11 +46,13 @@ static inline void unset_migratetype_isolate(struct page *page) { __unset_migratetype_isolate(page, MIGRATE_MOVABLE); } + +/* The below functions must be run on a range from a single zone. */ extern unsigned long alloc_contig_freed_pages(unsigned long start, unsigned long end, gfp_t flag); extern int alloc_contig_range(unsigned long start, unsigned long end, gfp_t flags, unsigned migratetype); -extern void free_contig_pages(struct page *page, int nr_pages); +extern void free_contig_pages(unsigned long pfn, unsigned nr_pages); /* * For migration. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 46e78d4..32fda5d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5716,9 +5716,41 @@ out: spin_unlock_irqrestore(zone-lock, flags); } +#if defined(CONFIG_FLATMEM) || defined(CONFIG_SPARSEMEM_VMEMMAP) + +/* + * In FLATMEM and CONFIG_SPARSEMEM_VMEMMAP we can safely increment the page + * pointer and get the same value as if we were to get by calling + * pfn_to_page() on incremented pfn counter. + */ +#define __contig_next_page(page, pageblock_left, pfn, increment) \ + ((page) + (increment)) + +#define __contig_first_page(pageblock_left, pfn) pfn_to_page(pfn) + +#else + +/* + * If we cross pageblock boundary, make sure we get a valid page pointer. If + * we are within pageblock, incrementing the pointer is good enough, and is + * a bit of an optimisation. + */ +#define __contig_next_page(page, pageblock_left, pfn, increment) \ + (likely((pageblock_left) -= (increment)) ? (page) + (increment) \ +: (((pageblock_left) = pageblock_nr_pages), pfn_to_page(pfn))) + +#define __contig_first_page(pageblock_left, pfn) ( \ + ((pageblock_left) = pageblock_nr_pages -\ +((pfn) (pageblock_nr_pages - 1))), \ + pfn_to_page(pfn)) + + +#endif + unsigned long alloc_contig_freed_pages(unsigned long start, unsigned long end, gfp_t flag) { + unsigned long pageblock_left __unused; unsigned long pfn = start, count; struct page *page; struct zone *zone; @@ -5729,27 +5761,37 @@ unsigned long alloc_contig_freed_pages(unsigned long start, unsigned long end, spin_lock_irq(zone-lock); - page = pfn_to_page(pfn); + page = __contig_first_page(pageblock_left, pfn); for (;;) { - VM_BUG_ON(page_count(page) || !PageBuddy(page)); + VM_BUG_ON(!page_count(page) || !PageBuddy(page) || +
[PATCH 1/3] fixup! mm: alloc_contig_freed_pages() added
From: Michal Nazarewicz min...@mina86.com Signed-off-by: Michal Nazarewicz min...@mina86.com --- include/asm-generic/memory_model.h | 17 ++ include/linux/page-isolation.h |4 ++- mm/page_alloc.c| 43 +++ 3 files changed, 53 insertions(+), 11 deletions(-) On Wed, 2011-09-21 at 17:19 +0200, Michal Nazarewicz wrote: I wanted to avoid calling pfn_to_page() each time as it seem fairly expensive in sparsemem and disctontig modes. At the same time, the macro trickery is so that users of sparsemem-vmemmap and flatmem won't have to pay the price. On Wed, 21 Sep 2011 17:45:59 +0200, Dave Hansen d...@linux.vnet.ibm.com wrote: Personally, I'd say the (incredibly minuscule) runtime cost is worth the cost of making folks' eyes bleed when they see those macros. I think there are some nicer ways to do it. Yeah. I wasn't amazed by them either. Is there a reason you can't logically do? page = pfn_to_page(pfn); for (;;) { if (pfn_to_section_nr(pfn) == pfn_to_section_nr(pfn+1)) page++; else page = pfn_to_page(pfn+1); } Done. Thanks for the suggestions! +#define __contig_next_page(page, pageblock_left, pfn, increment)\ +(likely((pageblock_left) -= (increment)) ? (page) + (increment) \ + : (((pageblock_left) = pageblock_nr_pages), pfn_to_page(pfn))) + +#define __contig_first_page(pageblock_left, pfn) ( \ +((pageblock_left) = pageblock_nr_pages -\ + ((pfn) (pageblock_nr_pages - 1))), \ +pfn_to_page(pfn)) + +#endif For the love of Pete, please make those in to functions if you're going to keep them. That was tricky because they modify pageblock_left. Not relevant now anyways though. diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index fb2d63f..900da88 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -69,6 +69,23 @@ }) #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */ +#if defined(CONFIG_SPARSEMEM) !defined(CONFIG_SPARSEMEM_VMEMMAP) + +/* + * Both PFNs must be from the same zone! If this function returns + * true, pfn_to_page(pfn1) + (pfn2 - pfn1) == pfn_to_page(pfn2). + */ +static inline bool zone_pfn_same_memmap(unsigned long pfn1, unsigned long pfn2) +{ + return pfn_to_section_nr(pfn1) == pfn_to_section_nr(pfn2); +} + +#else + +#define zone_pfn_same_memmap(pfn1, pfn2) (true) + +#endif + #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index b2a81fd..003c52f 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -46,11 +46,13 @@ static inline void unset_migratetype_isolate(struct page *page) { __unset_migratetype_isolate(page, MIGRATE_MOVABLE); } + +/* The below functions must be run on a range from a single zone. */ extern unsigned long alloc_contig_freed_pages(unsigned long start, unsigned long end, gfp_t flag); extern int alloc_contig_range(unsigned long start, unsigned long end, gfp_t flags, unsigned migratetype); -extern void free_contig_pages(struct page *page, int nr_pages); +extern void free_contig_pages(unsigned long pfn, unsigned nr_pages); /* * For migration. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 46e78d4..bc200a9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5725,31 +5725,46 @@ unsigned long alloc_contig_freed_pages(unsigned long start, unsigned long end, int order; VM_BUG_ON(!pfn_valid(start)); - zone = page_zone(pfn_to_page(start)); + page = pfn_to_page(start); + zone = page_zone(page); spin_lock_irq(zone-lock); - page = pfn_to_page(pfn); for (;;) { - VM_BUG_ON(page_count(page) || !PageBuddy(page)); + VM_BUG_ON(!page_count(page) || !PageBuddy(page) || + page_zone(page) != zone); + list_del(page-lru); order = page_order(page); + count = 1UL order; zone-free_area[order].nr_free--; rmv_page_order(page); - __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL order)); - pfn += 1 order; + __mod_zone_page_state(zone, NR_FREE_PAGES, -(long)count); + + pfn += count; if (pfn = end) break; VM_BUG_ON(!pfn_valid(pfn)); - page += 1 order; + + if (zone_pfn_same_memmap(pfn - count, pfn)) + page += count; + else + page = pfn_to_page(pfn); } spin_unlock_irq(zone-lock); /* After this,
Re: [PATCH 1/3] fixup! mm: alloc_contig_freed_pages() added
On Wed, 2011-09-21 at 18:26 +0200, Michal Nazarewicz wrote: - page += 1 order; + + if (zone_pfn_same_memmap(pfn - count, pfn)) + page += count; + else + page = pfn_to_page(pfn); } That all looks sane to me and should fix the bug I brought up. -- Dave -- To unsubscribe from this list: send the line unsubscribe linux-media in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html