[RFC PATCH v3 11/35] mm: Add an optimized version of del_from_freelist to keep page allocation fast
One of the main advantages of this design of memory regions is that page allocations can potentially be extremely fast - almost with no extra overhead from memory regions. To exploit that, introduce an optimized version of del_from_freelist(), which utilizes the fact that we always delete items from the head of the list during page allocation. Basically, we want to keep a note of the region from which we are allocating in a given freelist, to avoid having to compute the page-to-zone-region for every page allocation. So introduce a 'next_region' pointer in every freelist to achieve that, and use it to keep the fastpath of page allocation almost as fast as it would have been without memory regions. Signed-off-by: Srivatsa S. Bhat --- include/linux/mm.h | 14 +++ include/linux/mmzone.h |6 + mm/page_alloc.c| 62 +++- 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 52329d1..156d7db 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -747,6 +747,20 @@ static inline int page_zone_region_id(const struct page *page) return pgdat->node_regions[node_region_idx].zone_region_idx[z_num]; } +static inline void set_next_region_in_freelist(struct free_list *free_list) +{ + struct page *page; + int region_id; + + if (unlikely(list_empty(_list->list))) { + free_list->next_region = NULL; + } else { + page = list_entry(free_list->list.next, struct page, lru); + region_id = page_zone_region_id(page); + free_list->next_region = _list->mr_list[region_id]; + } +} + #ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 201ab42..932e71f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -92,6 +92,12 @@ struct free_list { struct list_headlist; /* +* Pointer to the region from which the next allocation will be +* satisfied. (Same as the freelist's first pageblock's region.) +*/ + struct mem_region_list *next_region; /* for fast page allocation */ + + /* * Demarcates pageblocks belonging to different regions within * this freelist. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07ac019..52b6655 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -548,6 +548,15 @@ static void add_to_freelist(struct page *page, struct free_list *free_list) /* This is the first region, so add to the head of the list */ prev_region_list = _list->list; +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN((list_empty(_list->list) && free_list->next_region != NULL), + "%s: next_region not NULL\n", __func__); +#endif + /* +* Set 'next_region' to this region, since this is the first region now +*/ + free_list->next_region = region; + out: list_add(lru, prev_region_list); @@ -555,6 +564,47 @@ out: region->page_block = lru; } +/** + * __rmqueue_smallest() *always* deletes elements from the head of the + * list. Use this knowledge to keep page allocation fast, despite being + * region-aware. + * + * Do *NOT* call this function if you are deleting from somewhere deep + * inside the freelist. + */ +static void rmqueue_del_from_freelist(struct page *page, + struct free_list *free_list) +{ + struct list_head *lru = >lru; + +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN((free_list->list.next != lru), + "%s: page not at head of list", __func__); +#endif + + list_del(lru); + + /* Fastpath */ + if (--(free_list->next_region->nr_free)) { + +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN(free_list->next_region->nr_free < 0, + "%s: nr_free is negative\n", __func__); +#endif + return; + } + + /* +* Slowpath, when this is the last pageblock of this region +* in this freelist. +*/ + free_list->next_region->page_block = NULL; + + /* Set 'next_region' to the new first region in the freelist. */ + set_next_region_in_freelist(free_list); +} + +/* Generic delete function for region-aware buddy allocator. */ static void del_from_freelist(struct page *page, struct free_list *free_list) { struct list_head *prev_page_lru, *lru, *p; @@ -562,6 +612,11 @@ static void del_from_freelist(struct page *page, struct free_list *free_list) int region_id; lru = >lru; + + /* Try to fastpath, if deleting from the head of the list */ + if (lru == free_list->list.next) + return rmqueue_del_from_freelist(page, free_list); + region_id = page_zone_region_id(page);
[RFC PATCH v3 11/35] mm: Add an optimized version of del_from_freelist to keep page allocation fast
One of the main advantages of this design of memory regions is that page allocations can potentially be extremely fast - almost with no extra overhead from memory regions. To exploit that, introduce an optimized version of del_from_freelist(), which utilizes the fact that we always delete items from the head of the list during page allocation. Basically, we want to keep a note of the region from which we are allocating in a given freelist, to avoid having to compute the page-to-zone-region for every page allocation. So introduce a 'next_region' pointer in every freelist to achieve that, and use it to keep the fastpath of page allocation almost as fast as it would have been without memory regions. Signed-off-by: Srivatsa S. Bhat --- include/linux/mm.h | 14 +++ include/linux/mmzone.h |6 + mm/page_alloc.c| 62 +++- 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 52329d1..156d7db 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -747,6 +747,20 @@ static inline int page_zone_region_id(const struct page *page) return pgdat->node_regions[node_region_idx].zone_region_idx[z_num]; } +static inline void set_next_region_in_freelist(struct free_list *free_list) +{ + struct page *page; + int region_id; + + if (unlikely(list_empty(_list->list))) { + free_list->next_region = NULL; + } else { + page = list_entry(free_list->list.next, struct page, lru); + region_id = page_zone_region_id(page); + free_list->next_region = _list->mr_list[region_id]; + } +} + #ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 201ab42..932e71f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -92,6 +92,12 @@ struct free_list { struct list_headlist; /* +* Pointer to the region from which the next allocation will be +* satisfied. (Same as the freelist's first pageblock's region.) +*/ + struct mem_region_list *next_region; /* for fast page allocation */ + + /* * Demarcates pageblocks belonging to different regions within * this freelist. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07ac019..52b6655 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -548,6 +548,15 @@ static void add_to_freelist(struct page *page, struct free_list *free_list) /* This is the first region, so add to the head of the list */ prev_region_list = _list->list; +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN((list_empty(_list->list) && free_list->next_region != NULL), + "%s: next_region not NULL\n", __func__); +#endif + /* +* Set 'next_region' to this region, since this is the first region now +*/ + free_list->next_region = region; + out: list_add(lru, prev_region_list); @@ -555,6 +564,47 @@ out: region->page_block = lru; } +/** + * __rmqueue_smallest() *always* deletes elements from the head of the + * list. Use this knowledge to keep page allocation fast, despite being + * region-aware. + * + * Do *NOT* call this function if you are deleting from somewhere deep + * inside the freelist. + */ +static void rmqueue_del_from_freelist(struct page *page, + struct free_list *free_list) +{ + struct list_head *lru = >lru; + +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN((free_list->list.next != lru), + "%s: page not at head of list", __func__); +#endif + + list_del(lru); + + /* Fastpath */ + if (--(free_list->next_region->nr_free)) { + +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN(free_list->next_region->nr_free < 0, + "%s: nr_free is negative\n", __func__); +#endif + return; + } + + /* +* Slowpath, when this is the last pageblock of this region +* in this freelist. +*/ + free_list->next_region->page_block = NULL; + + /* Set 'next_region' to the new first region in the freelist. */ + set_next_region_in_freelist(free_list); +} + +/* Generic delete function for region-aware buddy allocator. */ static void del_from_freelist(struct page *page, struct free_list *free_list) { struct list_head *prev_page_lru, *lru, *p; @@ -562,6 +612,11 @@ static void del_from_freelist(struct page *page, struct free_list *free_list) int region_id; lru = >lru; + + /* Try to fastpath, if deleting from the head of the list */ + if (lru == free_list->list.next) + return rmqueue_del_from_freelist(page, free_list); + region_id = page_zone_region_id(page);
[RFC PATCH v3 11/35] mm: Add an optimized version of del_from_freelist to keep page allocation fast
One of the main advantages of this design of memory regions is that page allocations can potentially be extremely fast - almost with no extra overhead from memory regions. To exploit that, introduce an optimized version of del_from_freelist(), which utilizes the fact that we always delete items from the head of the list during page allocation. Basically, we want to keep a note of the region from which we are allocating in a given freelist, to avoid having to compute the page-to-zone-region for every page allocation. So introduce a 'next_region' pointer in every freelist to achieve that, and use it to keep the fastpath of page allocation almost as fast as it would have been without memory regions. Signed-off-by: Srivatsa S. Bhat srivatsa.b...@linux.vnet.ibm.com --- include/linux/mm.h | 14 +++ include/linux/mmzone.h |6 + mm/page_alloc.c| 62 +++- 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 52329d1..156d7db 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -747,6 +747,20 @@ static inline int page_zone_region_id(const struct page *page) return pgdat-node_regions[node_region_idx].zone_region_idx[z_num]; } +static inline void set_next_region_in_freelist(struct free_list *free_list) +{ + struct page *page; + int region_id; + + if (unlikely(list_empty(free_list-list))) { + free_list-next_region = NULL; + } else { + page = list_entry(free_list-list.next, struct page, lru); + region_id = page_zone_region_id(page); + free_list-next_region = free_list-mr_list[region_id]; + } +} + #ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 201ab42..932e71f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -92,6 +92,12 @@ struct free_list { struct list_headlist; /* +* Pointer to the region from which the next allocation will be +* satisfied. (Same as the freelist's first pageblock's region.) +*/ + struct mem_region_list *next_region; /* for fast page allocation */ + + /* * Demarcates pageblocks belonging to different regions within * this freelist. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07ac019..52b6655 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -548,6 +548,15 @@ static void add_to_freelist(struct page *page, struct free_list *free_list) /* This is the first region, so add to the head of the list */ prev_region_list = free_list-list; +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN((list_empty(free_list-list) free_list-next_region != NULL), + %s: next_region not NULL\n, __func__); +#endif + /* +* Set 'next_region' to this region, since this is the first region now +*/ + free_list-next_region = region; + out: list_add(lru, prev_region_list); @@ -555,6 +564,47 @@ out: region-page_block = lru; } +/** + * __rmqueue_smallest() *always* deletes elements from the head of the + * list. Use this knowledge to keep page allocation fast, despite being + * region-aware. + * + * Do *NOT* call this function if you are deleting from somewhere deep + * inside the freelist. + */ +static void rmqueue_del_from_freelist(struct page *page, + struct free_list *free_list) +{ + struct list_head *lru = page-lru; + +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN((free_list-list.next != lru), + %s: page not at head of list, __func__); +#endif + + list_del(lru); + + /* Fastpath */ + if (--(free_list-next_region-nr_free)) { + +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN(free_list-next_region-nr_free 0, + %s: nr_free is negative\n, __func__); +#endif + return; + } + + /* +* Slowpath, when this is the last pageblock of this region +* in this freelist. +*/ + free_list-next_region-page_block = NULL; + + /* Set 'next_region' to the new first region in the freelist. */ + set_next_region_in_freelist(free_list); +} + +/* Generic delete function for region-aware buddy allocator. */ static void del_from_freelist(struct page *page, struct free_list *free_list) { struct list_head *prev_page_lru, *lru, *p; @@ -562,6 +612,11 @@ static void del_from_freelist(struct page *page, struct free_list *free_list) int region_id; lru = page-lru; + + /* Try to fastpath, if deleting from the head of the list */ + if (lru == free_list-list.next) + return rmqueue_del_from_freelist(page, free_list); + region_id =
[RFC PATCH v3 11/35] mm: Add an optimized version of del_from_freelist to keep page allocation fast
One of the main advantages of this design of memory regions is that page allocations can potentially be extremely fast - almost with no extra overhead from memory regions. To exploit that, introduce an optimized version of del_from_freelist(), which utilizes the fact that we always delete items from the head of the list during page allocation. Basically, we want to keep a note of the region from which we are allocating in a given freelist, to avoid having to compute the page-to-zone-region for every page allocation. So introduce a 'next_region' pointer in every freelist to achieve that, and use it to keep the fastpath of page allocation almost as fast as it would have been without memory regions. Signed-off-by: Srivatsa S. Bhat srivatsa.b...@linux.vnet.ibm.com --- include/linux/mm.h | 14 +++ include/linux/mmzone.h |6 + mm/page_alloc.c| 62 +++- 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 52329d1..156d7db 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -747,6 +747,20 @@ static inline int page_zone_region_id(const struct page *page) return pgdat-node_regions[node_region_idx].zone_region_idx[z_num]; } +static inline void set_next_region_in_freelist(struct free_list *free_list) +{ + struct page *page; + int region_id; + + if (unlikely(list_empty(free_list-list))) { + free_list-next_region = NULL; + } else { + page = list_entry(free_list-list.next, struct page, lru); + region_id = page_zone_region_id(page); + free_list-next_region = free_list-mr_list[region_id]; + } +} + #ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 201ab42..932e71f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -92,6 +92,12 @@ struct free_list { struct list_headlist; /* +* Pointer to the region from which the next allocation will be +* satisfied. (Same as the freelist's first pageblock's region.) +*/ + struct mem_region_list *next_region; /* for fast page allocation */ + + /* * Demarcates pageblocks belonging to different regions within * this freelist. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07ac019..52b6655 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -548,6 +548,15 @@ static void add_to_freelist(struct page *page, struct free_list *free_list) /* This is the first region, so add to the head of the list */ prev_region_list = free_list-list; +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN((list_empty(free_list-list) free_list-next_region != NULL), + %s: next_region not NULL\n, __func__); +#endif + /* +* Set 'next_region' to this region, since this is the first region now +*/ + free_list-next_region = region; + out: list_add(lru, prev_region_list); @@ -555,6 +564,47 @@ out: region-page_block = lru; } +/** + * __rmqueue_smallest() *always* deletes elements from the head of the + * list. Use this knowledge to keep page allocation fast, despite being + * region-aware. + * + * Do *NOT* call this function if you are deleting from somewhere deep + * inside the freelist. + */ +static void rmqueue_del_from_freelist(struct page *page, + struct free_list *free_list) +{ + struct list_head *lru = page-lru; + +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN((free_list-list.next != lru), + %s: page not at head of list, __func__); +#endif + + list_del(lru); + + /* Fastpath */ + if (--(free_list-next_region-nr_free)) { + +#ifdef CONFIG_DEBUG_PAGEALLOC + WARN(free_list-next_region-nr_free 0, + %s: nr_free is negative\n, __func__); +#endif + return; + } + + /* +* Slowpath, when this is the last pageblock of this region +* in this freelist. +*/ + free_list-next_region-page_block = NULL; + + /* Set 'next_region' to the new first region in the freelist. */ + set_next_region_in_freelist(free_list); +} + +/* Generic delete function for region-aware buddy allocator. */ static void del_from_freelist(struct page *page, struct free_list *free_list) { struct list_head *prev_page_lru, *lru, *p; @@ -562,6 +612,11 @@ static void del_from_freelist(struct page *page, struct free_list *free_list) int region_id; lru = page-lru; + + /* Try to fastpath, if deleting from the head of the list */ + if (lru == free_list-list.next) + return rmqueue_del_from_freelist(page, free_list); + region_id =