[RFC PATCH v3 11/35] mm: Add an optimized version of del_from_freelist to keep page allocation fast

2013-08-30 Thread Srivatsa S. Bhat
One of the main advantages of this design of memory regions is that page
allocations can potentially be extremely fast - almost with no extra
overhead from memory regions.

To exploit that, introduce an optimized version of del_from_freelist(), which
utilizes the fact that we always delete items from the head of the list
during page allocation.

Basically, we want to keep a note of the region from which we are allocating
in a given freelist, to avoid having to compute the page-to-zone-region for
every page allocation. So introduce a 'next_region' pointer in every freelist
to achieve that, and use it to keep the fastpath of page allocation almost as
fast as it would have been without memory regions.

Signed-off-by: Srivatsa S. Bhat 
---

 include/linux/mm.h |   14 +++
 include/linux/mmzone.h |6 +
 mm/page_alloc.c|   62 +++-
 3 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52329d1..156d7db 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -747,6 +747,20 @@ static inline int page_zone_region_id(const struct page 
*page)
return pgdat->node_regions[node_region_idx].zone_region_idx[z_num];
 }
 
+static inline void set_next_region_in_freelist(struct free_list *free_list)
+{
+   struct page *page;
+   int region_id;
+
+   if (unlikely(list_empty(_list->list))) {
+   free_list->next_region = NULL;
+   } else {
+   page = list_entry(free_list->list.next, struct page, lru);
+   region_id = page_zone_region_id(page);
+   free_list->next_region = _list->mr_list[region_id];
+   }
+}
+
 #ifdef SECTION_IN_PAGE_FLAGS
 static inline void set_page_section(struct page *page, unsigned long section)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 201ab42..932e71f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -92,6 +92,12 @@ struct free_list {
struct list_headlist;
 
/*
+* Pointer to the region from which the next allocation will be
+* satisfied. (Same as the freelist's first pageblock's region.)
+*/
+   struct mem_region_list  *next_region; /* for fast page allocation */
+
+   /*
 * Demarcates pageblocks belonging to different regions within
 * this freelist.
 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07ac019..52b6655 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -548,6 +548,15 @@ static void add_to_freelist(struct page *page, struct 
free_list *free_list)
/* This is the first region, so add to the head of the list */
prev_region_list = _list->list;
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN((list_empty(_list->list) && free_list->next_region != NULL),
+   "%s: next_region not NULL\n", __func__);
+#endif
+   /*
+* Set 'next_region' to this region, since this is the first region now
+*/
+   free_list->next_region = region;
+
 out:
list_add(lru, prev_region_list);
 
@@ -555,6 +564,47 @@ out:
region->page_block = lru;
 }
 
+/**
+ * __rmqueue_smallest() *always* deletes elements from the head of the
+ * list. Use this knowledge to keep page allocation fast, despite being
+ * region-aware.
+ *
+ * Do *NOT* call this function if you are deleting from somewhere deep
+ * inside the freelist.
+ */
+static void rmqueue_del_from_freelist(struct page *page,
+ struct free_list *free_list)
+{
+   struct list_head *lru = >lru;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN((free_list->list.next != lru),
+   "%s: page not at head of list", __func__);
+#endif
+
+   list_del(lru);
+
+   /* Fastpath */
+   if (--(free_list->next_region->nr_free)) {
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN(free_list->next_region->nr_free < 0,
+   "%s: nr_free is negative\n", __func__);
+#endif
+   return;
+   }
+
+   /*
+* Slowpath, when this is the last pageblock of this region
+* in this freelist.
+*/
+   free_list->next_region->page_block = NULL;
+
+   /* Set 'next_region' to the new first region in the freelist. */
+   set_next_region_in_freelist(free_list);
+}
+
+/* Generic delete function for region-aware buddy allocator. */
 static void del_from_freelist(struct page *page, struct free_list *free_list)
 {
struct list_head *prev_page_lru, *lru, *p;
@@ -562,6 +612,11 @@ static void del_from_freelist(struct page *page, struct 
free_list *free_list)
int region_id;
 
lru = >lru;
+
+   /* Try to fastpath, if deleting from the head of the list */
+   if (lru == free_list->list.next)
+   return rmqueue_del_from_freelist(page, free_list);
+
region_id = page_zone_region_id(page);

[RFC PATCH v3 11/35] mm: Add an optimized version of del_from_freelist to keep page allocation fast

2013-08-30 Thread Srivatsa S. Bhat
One of the main advantages of this design of memory regions is that page
allocations can potentially be extremely fast - almost with no extra
overhead from memory regions.

To exploit that, introduce an optimized version of del_from_freelist(), which
utilizes the fact that we always delete items from the head of the list
during page allocation.

Basically, we want to keep a note of the region from which we are allocating
in a given freelist, to avoid having to compute the page-to-zone-region for
every page allocation. So introduce a 'next_region' pointer in every freelist
to achieve that, and use it to keep the fastpath of page allocation almost as
fast as it would have been without memory regions.

Signed-off-by: Srivatsa S. Bhat 
---

 include/linux/mm.h |   14 +++
 include/linux/mmzone.h |6 +
 mm/page_alloc.c|   62 +++-
 3 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52329d1..156d7db 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -747,6 +747,20 @@ static inline int page_zone_region_id(const struct page 
*page)
return pgdat->node_regions[node_region_idx].zone_region_idx[z_num];
 }
 
+static inline void set_next_region_in_freelist(struct free_list *free_list)
+{
+   struct page *page;
+   int region_id;
+
+   if (unlikely(list_empty(_list->list))) {
+   free_list->next_region = NULL;
+   } else {
+   page = list_entry(free_list->list.next, struct page, lru);
+   region_id = page_zone_region_id(page);
+   free_list->next_region = _list->mr_list[region_id];
+   }
+}
+
 #ifdef SECTION_IN_PAGE_FLAGS
 static inline void set_page_section(struct page *page, unsigned long section)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 201ab42..932e71f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -92,6 +92,12 @@ struct free_list {
struct list_headlist;
 
/*
+* Pointer to the region from which the next allocation will be
+* satisfied. (Same as the freelist's first pageblock's region.)
+*/
+   struct mem_region_list  *next_region; /* for fast page allocation */
+
+   /*
 * Demarcates pageblocks belonging to different regions within
 * this freelist.
 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07ac019..52b6655 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -548,6 +548,15 @@ static void add_to_freelist(struct page *page, struct 
free_list *free_list)
/* This is the first region, so add to the head of the list */
prev_region_list = _list->list;
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN((list_empty(_list->list) && free_list->next_region != NULL),
+   "%s: next_region not NULL\n", __func__);
+#endif
+   /*
+* Set 'next_region' to this region, since this is the first region now
+*/
+   free_list->next_region = region;
+
 out:
list_add(lru, prev_region_list);
 
@@ -555,6 +564,47 @@ out:
region->page_block = lru;
 }
 
+/**
+ * __rmqueue_smallest() *always* deletes elements from the head of the
+ * list. Use this knowledge to keep page allocation fast, despite being
+ * region-aware.
+ *
+ * Do *NOT* call this function if you are deleting from somewhere deep
+ * inside the freelist.
+ */
+static void rmqueue_del_from_freelist(struct page *page,
+ struct free_list *free_list)
+{
+   struct list_head *lru = >lru;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN((free_list->list.next != lru),
+   "%s: page not at head of list", __func__);
+#endif
+
+   list_del(lru);
+
+   /* Fastpath */
+   if (--(free_list->next_region->nr_free)) {
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN(free_list->next_region->nr_free < 0,
+   "%s: nr_free is negative\n", __func__);
+#endif
+   return;
+   }
+
+   /*
+* Slowpath, when this is the last pageblock of this region
+* in this freelist.
+*/
+   free_list->next_region->page_block = NULL;
+
+   /* Set 'next_region' to the new first region in the freelist. */
+   set_next_region_in_freelist(free_list);
+}
+
+/* Generic delete function for region-aware buddy allocator. */
 static void del_from_freelist(struct page *page, struct free_list *free_list)
 {
struct list_head *prev_page_lru, *lru, *p;
@@ -562,6 +612,11 @@ static void del_from_freelist(struct page *page, struct 
free_list *free_list)
int region_id;
 
lru = >lru;
+
+   /* Try to fastpath, if deleting from the head of the list */
+   if (lru == free_list->list.next)
+   return rmqueue_del_from_freelist(page, free_list);
+
region_id = page_zone_region_id(page);

[RFC PATCH v3 11/35] mm: Add an optimized version of del_from_freelist to keep page allocation fast

2013-08-30 Thread Srivatsa S. Bhat
One of the main advantages of this design of memory regions is that page
allocations can potentially be extremely fast - almost with no extra
overhead from memory regions.

To exploit that, introduce an optimized version of del_from_freelist(), which
utilizes the fact that we always delete items from the head of the list
during page allocation.

Basically, we want to keep a note of the region from which we are allocating
in a given freelist, to avoid having to compute the page-to-zone-region for
every page allocation. So introduce a 'next_region' pointer in every freelist
to achieve that, and use it to keep the fastpath of page allocation almost as
fast as it would have been without memory regions.

Signed-off-by: Srivatsa S. Bhat srivatsa.b...@linux.vnet.ibm.com
---

 include/linux/mm.h |   14 +++
 include/linux/mmzone.h |6 +
 mm/page_alloc.c|   62 +++-
 3 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52329d1..156d7db 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -747,6 +747,20 @@ static inline int page_zone_region_id(const struct page 
*page)
return pgdat-node_regions[node_region_idx].zone_region_idx[z_num];
 }
 
+static inline void set_next_region_in_freelist(struct free_list *free_list)
+{
+   struct page *page;
+   int region_id;
+
+   if (unlikely(list_empty(free_list-list))) {
+   free_list-next_region = NULL;
+   } else {
+   page = list_entry(free_list-list.next, struct page, lru);
+   region_id = page_zone_region_id(page);
+   free_list-next_region = free_list-mr_list[region_id];
+   }
+}
+
 #ifdef SECTION_IN_PAGE_FLAGS
 static inline void set_page_section(struct page *page, unsigned long section)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 201ab42..932e71f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -92,6 +92,12 @@ struct free_list {
struct list_headlist;
 
/*
+* Pointer to the region from which the next allocation will be
+* satisfied. (Same as the freelist's first pageblock's region.)
+*/
+   struct mem_region_list  *next_region; /* for fast page allocation */
+
+   /*
 * Demarcates pageblocks belonging to different regions within
 * this freelist.
 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07ac019..52b6655 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -548,6 +548,15 @@ static void add_to_freelist(struct page *page, struct 
free_list *free_list)
/* This is the first region, so add to the head of the list */
prev_region_list = free_list-list;
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN((list_empty(free_list-list)  free_list-next_region != NULL),
+   %s: next_region not NULL\n, __func__);
+#endif
+   /*
+* Set 'next_region' to this region, since this is the first region now
+*/
+   free_list-next_region = region;
+
 out:
list_add(lru, prev_region_list);
 
@@ -555,6 +564,47 @@ out:
region-page_block = lru;
 }
 
+/**
+ * __rmqueue_smallest() *always* deletes elements from the head of the
+ * list. Use this knowledge to keep page allocation fast, despite being
+ * region-aware.
+ *
+ * Do *NOT* call this function if you are deleting from somewhere deep
+ * inside the freelist.
+ */
+static void rmqueue_del_from_freelist(struct page *page,
+ struct free_list *free_list)
+{
+   struct list_head *lru = page-lru;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN((free_list-list.next != lru),
+   %s: page not at head of list, __func__);
+#endif
+
+   list_del(lru);
+
+   /* Fastpath */
+   if (--(free_list-next_region-nr_free)) {
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN(free_list-next_region-nr_free  0,
+   %s: nr_free is negative\n, __func__);
+#endif
+   return;
+   }
+
+   /*
+* Slowpath, when this is the last pageblock of this region
+* in this freelist.
+*/
+   free_list-next_region-page_block = NULL;
+
+   /* Set 'next_region' to the new first region in the freelist. */
+   set_next_region_in_freelist(free_list);
+}
+
+/* Generic delete function for region-aware buddy allocator. */
 static void del_from_freelist(struct page *page, struct free_list *free_list)
 {
struct list_head *prev_page_lru, *lru, *p;
@@ -562,6 +612,11 @@ static void del_from_freelist(struct page *page, struct 
free_list *free_list)
int region_id;
 
lru = page-lru;
+
+   /* Try to fastpath, if deleting from the head of the list */
+   if (lru == free_list-list.next)
+   return rmqueue_del_from_freelist(page, free_list);
+
region_id = 

[RFC PATCH v3 11/35] mm: Add an optimized version of del_from_freelist to keep page allocation fast

2013-08-30 Thread Srivatsa S. Bhat
One of the main advantages of this design of memory regions is that page
allocations can potentially be extremely fast - almost with no extra
overhead from memory regions.

To exploit that, introduce an optimized version of del_from_freelist(), which
utilizes the fact that we always delete items from the head of the list
during page allocation.

Basically, we want to keep a note of the region from which we are allocating
in a given freelist, to avoid having to compute the page-to-zone-region for
every page allocation. So introduce a 'next_region' pointer in every freelist
to achieve that, and use it to keep the fastpath of page allocation almost as
fast as it would have been without memory regions.

Signed-off-by: Srivatsa S. Bhat srivatsa.b...@linux.vnet.ibm.com
---

 include/linux/mm.h |   14 +++
 include/linux/mmzone.h |6 +
 mm/page_alloc.c|   62 +++-
 3 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52329d1..156d7db 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -747,6 +747,20 @@ static inline int page_zone_region_id(const struct page 
*page)
return pgdat-node_regions[node_region_idx].zone_region_idx[z_num];
 }
 
+static inline void set_next_region_in_freelist(struct free_list *free_list)
+{
+   struct page *page;
+   int region_id;
+
+   if (unlikely(list_empty(free_list-list))) {
+   free_list-next_region = NULL;
+   } else {
+   page = list_entry(free_list-list.next, struct page, lru);
+   region_id = page_zone_region_id(page);
+   free_list-next_region = free_list-mr_list[region_id];
+   }
+}
+
 #ifdef SECTION_IN_PAGE_FLAGS
 static inline void set_page_section(struct page *page, unsigned long section)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 201ab42..932e71f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -92,6 +92,12 @@ struct free_list {
struct list_headlist;
 
/*
+* Pointer to the region from which the next allocation will be
+* satisfied. (Same as the freelist's first pageblock's region.)
+*/
+   struct mem_region_list  *next_region; /* for fast page allocation */
+
+   /*
 * Demarcates pageblocks belonging to different regions within
 * this freelist.
 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07ac019..52b6655 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -548,6 +548,15 @@ static void add_to_freelist(struct page *page, struct 
free_list *free_list)
/* This is the first region, so add to the head of the list */
prev_region_list = free_list-list;
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN((list_empty(free_list-list)  free_list-next_region != NULL),
+   %s: next_region not NULL\n, __func__);
+#endif
+   /*
+* Set 'next_region' to this region, since this is the first region now
+*/
+   free_list-next_region = region;
+
 out:
list_add(lru, prev_region_list);
 
@@ -555,6 +564,47 @@ out:
region-page_block = lru;
 }
 
+/**
+ * __rmqueue_smallest() *always* deletes elements from the head of the
+ * list. Use this knowledge to keep page allocation fast, despite being
+ * region-aware.
+ *
+ * Do *NOT* call this function if you are deleting from somewhere deep
+ * inside the freelist.
+ */
+static void rmqueue_del_from_freelist(struct page *page,
+ struct free_list *free_list)
+{
+   struct list_head *lru = page-lru;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN((free_list-list.next != lru),
+   %s: page not at head of list, __func__);
+#endif
+
+   list_del(lru);
+
+   /* Fastpath */
+   if (--(free_list-next_region-nr_free)) {
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+   WARN(free_list-next_region-nr_free  0,
+   %s: nr_free is negative\n, __func__);
+#endif
+   return;
+   }
+
+   /*
+* Slowpath, when this is the last pageblock of this region
+* in this freelist.
+*/
+   free_list-next_region-page_block = NULL;
+
+   /* Set 'next_region' to the new first region in the freelist. */
+   set_next_region_in_freelist(free_list);
+}
+
+/* Generic delete function for region-aware buddy allocator. */
 static void del_from_freelist(struct page *page, struct free_list *free_list)
 {
struct list_head *prev_page_lru, *lru, *p;
@@ -562,6 +612,11 @@ static void del_from_freelist(struct page *page, struct 
free_list *free_list)
int region_id;
 
lru = page-lru;
+
+   /* Try to fastpath, if deleting from the head of the list */
+   if (lru == free_list-list.next)
+   return rmqueue_del_from_freelist(page, free_list);
+
region_id =