[PATCH 1/3] fixup! mm: alloc_contig_freed_pages() added

2011-09-21 Thread Michal Nazarewicz
From: Michal Nazarewicz min...@mina86.com

Signed-off-by: Michal Nazarewicz min...@mina86.com

---
 include/linux/page-isolation.h |4 ++-
 mm/page_alloc.c|   66 ++-
 2 files changed, 60 insertions(+), 10 deletions(-)

 On Fri, 2011-08-19 at 16:27 +0200, Marek Szyprowski wrote:
 +unsigned long alloc_contig_freed_pages(unsigned long start, unsigned  
 long end,
 +   gfp_t flag)
 +{
 +unsigned long pfn = start, count;
 +struct page *page;
 +struct zone *zone;
 +int order;
 +
 +VM_BUG_ON(!pfn_valid(start));
 +zone = page_zone(pfn_to_page(start));

On Thu, 08 Sep 2011 20:05:52 +0200, Dave Hansen d...@linux.vnet.ibm.com wrote:
 This implies that start-end are entirely contained in a single zone.
 What enforces that?  If some higher layer enforces that, I think we
 probably need at least a VM_BUG_ON() in here and a comment about who
 enforces it.

 +spin_lock_irq(zone-lock);
 +
 +page = pfn_to_page(pfn);
 +for (;;) {
 +VM_BUG_ON(page_count(page) || !PageBuddy(page));
 +list_del(page-lru);
 +order = page_order(page);
 +zone-free_area[order].nr_free--;
 +rmv_page_order(page);
 +__mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL  order));
 +pfn  += 1  order;
 +if (pfn = end)
 +break;
 +VM_BUG_ON(!pfn_valid(pfn));
 +page += 1  order;
 +}

 This 'struct page *'++ stuff is OK, but only for small, aligned areas.
 For at least some of the sparsemem modes (non-VMEMMAP), you could walk
 off of the end of the section_mem_map[] when you cross a MAX_ORDER
 boundary.  I'd feel a little bit more comfortable if pfn_to_page() was
 being done each time, or only occasionally when you cross a section
 boundary.

Do the attached changes seem to make sense?

I wanted to avoid calling pfn_to_page() each time as it seem fairly
expensive in sparsemem and disctontig modes.  At the same time, the
macro trickery is so that users of sparsemem-vmemmap and flatmem won't
have to pay the price.

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index b2a81fd..003c52f 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -46,11 +46,13 @@ static inline void unset_migratetype_isolate(struct page 
*page)
 {
__unset_migratetype_isolate(page, MIGRATE_MOVABLE);
 }
+
+/* The below functions must be run on a range from a single zone. */
 extern unsigned long alloc_contig_freed_pages(unsigned long start,
  unsigned long end, gfp_t flag);
 extern int alloc_contig_range(unsigned long start, unsigned long end,
  gfp_t flags, unsigned migratetype);
-extern void free_contig_pages(struct page *page, int nr_pages);
+extern void free_contig_pages(unsigned long pfn, unsigned nr_pages);
 
 /*
  * For migration.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 46e78d4..32fda5d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5716,9 +5716,41 @@ out:
spin_unlock_irqrestore(zone-lock, flags);
 }
 
+#if defined(CONFIG_FLATMEM) || defined(CONFIG_SPARSEMEM_VMEMMAP)
+
+/*
+ * In FLATMEM and CONFIG_SPARSEMEM_VMEMMAP we can safely increment the page
+ * pointer and get the same value as if we were to get by calling
+ * pfn_to_page() on incremented pfn counter.
+ */
+#define __contig_next_page(page, pageblock_left, pfn, increment) \
+   ((page) + (increment))
+
+#define __contig_first_page(pageblock_left, pfn) pfn_to_page(pfn)
+
+#else
+
+/*
+ * If we cross pageblock boundary, make sure we get a valid page pointer.  If
+ * we are within pageblock, incrementing the pointer is good enough, and is
+ * a bit of an optimisation.
+ */
+#define __contig_next_page(page, pageblock_left, pfn, increment)   \
+   (likely((pageblock_left) -= (increment)) ? (page) + (increment) \
+: (((pageblock_left) = pageblock_nr_pages), pfn_to_page(pfn)))
+
+#define __contig_first_page(pageblock_left, pfn) ( \
+   ((pageblock_left) = pageblock_nr_pages -\
+((pfn)  (pageblock_nr_pages - 1))),   \
+   pfn_to_page(pfn))
+
+
+#endif
+
 unsigned long alloc_contig_freed_pages(unsigned long start, unsigned long end,
   gfp_t flag)
 {
+   unsigned long pageblock_left __unused;
unsigned long pfn = start, count;
struct page *page;
struct zone *zone;
@@ -5729,27 +5761,37 @@ unsigned long alloc_contig_freed_pages(unsigned long 
start, unsigned long end,
 
spin_lock_irq(zone-lock);
 
-   page = pfn_to_page(pfn);
+   page = __contig_first_page(pageblock_left, pfn);
for (;;) {
-   VM_BUG_ON(page_count(page) || !PageBuddy(page));
+   VM_BUG_ON(!page_count(page) || !PageBuddy(page) ||
+

[PATCH 1/3] fixup! mm: alloc_contig_freed_pages() added

2011-09-21 Thread Michal Nazarewicz
From: Michal Nazarewicz min...@mina86.com

Signed-off-by: Michal Nazarewicz min...@mina86.com

---
 include/asm-generic/memory_model.h |   17 ++
 include/linux/page-isolation.h |4 ++-
 mm/page_alloc.c|   43 +++
 3 files changed, 53 insertions(+), 11 deletions(-)

 On Wed, 2011-09-21 at 17:19 +0200, Michal Nazarewicz wrote:
 I wanted to avoid calling pfn_to_page() each time as it seem fairly
 expensive in sparsemem and disctontig modes.  At the same time, the
 macro trickery is so that users of sparsemem-vmemmap and flatmem won't
 have to pay the price.

On Wed, 21 Sep 2011 17:45:59 +0200, Dave Hansen d...@linux.vnet.ibm.com wrote:
 Personally, I'd say the (incredibly minuscule) runtime cost is worth the
 cost of making folks' eyes bleed when they see those macros.  I think
 there are some nicer ways to do it.

Yeah.  I wasn't amazed by them either.

 Is there a reason you can't logically do?
   page = pfn_to_page(pfn);
   for (;;) {
   if (pfn_to_section_nr(pfn) == pfn_to_section_nr(pfn+1))
   page++;
   else
   page = pfn_to_page(pfn+1);
   }

Done.  Thanks for the suggestions!

 +#define __contig_next_page(page, pageblock_left, pfn, increment)\
 +(likely((pageblock_left) -= (increment)) ? (page) + (increment) \
 + : (((pageblock_left) = pageblock_nr_pages), pfn_to_page(pfn)))
 +
 +#define __contig_first_page(pageblock_left, pfn) (  \
 +((pageblock_left) = pageblock_nr_pages -\
 + ((pfn)  (pageblock_nr_pages - 1))),   \
 +pfn_to_page(pfn))
 +
 +#endif

 For the love of Pete, please make those in to functions if you're going
 to keep them.

That was tricky because they modify pageblock_left.  Not relevant now
anyways though.

diff --git a/include/asm-generic/memory_model.h 
b/include/asm-generic/memory_model.h
index fb2d63f..900da88 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -69,6 +69,23 @@
 })
 #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
 
+#if defined(CONFIG_SPARSEMEM)  !defined(CONFIG_SPARSEMEM_VMEMMAP)
+
+/*
+ * Both PFNs must be from the same zone!  If this function returns
+ * true, pfn_to_page(pfn1) + (pfn2 - pfn1) == pfn_to_page(pfn2).
+ */
+static inline bool zone_pfn_same_memmap(unsigned long pfn1, unsigned long pfn2)
+{
+   return pfn_to_section_nr(pfn1) == pfn_to_section_nr(pfn2);
+}
+
+#else
+
+#define zone_pfn_same_memmap(pfn1, pfn2) (true)
+
+#endif
+
 #define page_to_pfn __page_to_pfn
 #define pfn_to_page __pfn_to_page
 
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index b2a81fd..003c52f 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -46,11 +46,13 @@ static inline void unset_migratetype_isolate(struct page 
*page)
 {
__unset_migratetype_isolate(page, MIGRATE_MOVABLE);
 }
+
+/* The below functions must be run on a range from a single zone. */
 extern unsigned long alloc_contig_freed_pages(unsigned long start,
  unsigned long end, gfp_t flag);
 extern int alloc_contig_range(unsigned long start, unsigned long end,
  gfp_t flags, unsigned migratetype);
-extern void free_contig_pages(struct page *page, int nr_pages);
+extern void free_contig_pages(unsigned long pfn, unsigned nr_pages);
 
 /*
  * For migration.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 46e78d4..bc200a9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5725,31 +5725,46 @@ unsigned long alloc_contig_freed_pages(unsigned long 
start, unsigned long end,
int order;
 
VM_BUG_ON(!pfn_valid(start));
-   zone = page_zone(pfn_to_page(start));
+   page = pfn_to_page(start);
+   zone = page_zone(page);
 
spin_lock_irq(zone-lock);
 
-   page = pfn_to_page(pfn);
for (;;) {
-   VM_BUG_ON(page_count(page) || !PageBuddy(page));
+   VM_BUG_ON(!page_count(page) || !PageBuddy(page) ||
+ page_zone(page) != zone);
+
list_del(page-lru);
order = page_order(page);
+   count = 1UL  order;
zone-free_area[order].nr_free--;
rmv_page_order(page);
-   __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL  order));
-   pfn  += 1  order;
+   __mod_zone_page_state(zone, NR_FREE_PAGES, -(long)count);
+
+   pfn += count;
if (pfn = end)
break;
VM_BUG_ON(!pfn_valid(pfn));
-   page += 1  order;
+
+   if (zone_pfn_same_memmap(pfn - count, pfn))
+   page += count;
+   else
+   page = pfn_to_page(pfn);
}
 
spin_unlock_irq(zone-lock);
 
/* After this, 

Re: [PATCH 1/3] fixup! mm: alloc_contig_freed_pages() added

2011-09-21 Thread Dave Hansen
On Wed, 2011-09-21 at 18:26 +0200, Michal Nazarewicz wrote:
 -   page += 1  order;
 +
 +   if (zone_pfn_same_memmap(pfn - count, pfn))
 +   page += count;
 +   else
 +   page = pfn_to_page(pfn);
 }

That all looks sane to me and should fix the bug I brought up.

-- Dave

--
To unsubscribe from this list: send the line unsubscribe linux-media in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html