[PATCH v2 09/11] mm/slab: separate cache_grow() to two parts

2016-04-11 Thread js1304
From: Joonsoo Kim 

This is a preparation step to implement lockless allocation path when
there is no free objects in kmem_cache.  What we'd like to do here is to
refill cpu cache without holding a node lock.  To accomplish this purpose,
refill should be done after new slab allocation but before attaching the
slab to the management list.  So, this patch separates cache_grow() to two
parts, allocation and attaching to the list in order to add some code
inbetween them in the following patch.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 74 ---
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 1910589..2c28ad5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -213,6 +213,11 @@ static void slabs_destroy(struct kmem_cache *cachep, 
struct list_head *list);
 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
 static void cache_reap(struct work_struct *unused);
 
+static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
+   void **list);
+static inline void fixup_slab_list(struct kmem_cache *cachep,
+   struct kmem_cache_node *n, struct page *page,
+   void **list);
 static int slab_early_init = 1;
 
 #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
@@ -1797,7 +1802,7 @@ static size_t calculate_slab_order(struct kmem_cache 
*cachep,
 
/*
 * Needed to avoid possible looping condition
-* in cache_grow()
+* in cache_grow_begin()
 */
if (OFF_SLAB(freelist_cache))
continue;
@@ -2543,7 +2548,8 @@ static void slab_map_pages(struct kmem_cache *cache, 
struct page *page,
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static struct page *cache_grow_begin(struct kmem_cache *cachep,
+   gfp_t flags, int nodeid)
 {
void *freelist;
size_t offset;
@@ -2609,21 +2615,40 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t 
flags, int nodeid)
 
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
-   check_irq_off();
-   spin_lock(>list_lock);
 
-   /* Make slab active. */
-   list_add_tail(>lru, &(n->slabs_free));
-   STATS_INC_GROWN(cachep);
-   n->free_objects += cachep->num;
-   spin_unlock(>list_lock);
-   return page_node;
+   return page;
+
 opps1:
kmem_freepages(cachep, page);
 failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
-   return -1;
+   return NULL;
+}
+
+static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+{
+   struct kmem_cache_node *n;
+   void *list = NULL;
+
+   check_irq_off();
+
+   if (!page)
+   return;
+
+   INIT_LIST_HEAD(>lru);
+   n = get_node(cachep, page_to_nid(page));
+
+   spin_lock(>list_lock);
+   if (!page->active)
+   list_add_tail(>lru, &(n->slabs_free));
+   else
+   fixup_slab_list(cachep, n, page, );
+   STATS_INC_GROWN(cachep);
+   n->free_objects += cachep->num - page->active;
+   spin_unlock(>list_lock);
+
+   fixup_objfreelist_debug(cachep, );
 }
 
 #if DEBUG
@@ -2834,6 +2859,7 @@ static void *cache_alloc_refill(struct kmem_cache 
*cachep, gfp_t flags)
struct array_cache *ac;
int node;
void *list = NULL;
+   struct page *page;
 
check_irq_off();
node = numa_mem_id();
@@ -2861,7 +2887,6 @@ retry:
}
 
while (batchcount > 0) {
-   struct page *page;
/* Get slab alloc is to come from. */
page = get_first_slab(n, false);
if (!page)
@@ -2894,8 +2919,6 @@ alloc_done:
fixup_objfreelist_debug(cachep, );
 
if (unlikely(!ac->avail)) {
-   int x;
-
/* Check if we can use obj in pfmemalloc slab */
if (sk_memalloc_socks()) {
void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
@@ -2904,14 +2927,18 @@ alloc_done:
return obj;
}
 
-   x = cache_grow(cachep, gfp_exact_node(flags), node);
+   page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
+   cache_grow_end(cachep, page);
 
-   /* cache_grow can reenable interrupts, then ac could change. */
+   /*
+* cache_grow_begin() can reenable interrupts,
+* then ac could change.
+*/
   

[PATCH v2 09/11] mm/slab: separate cache_grow() to two parts

2016-04-11 Thread js1304
From: Joonsoo Kim 

This is a preparation step to implement lockless allocation path when
there is no free objects in kmem_cache.  What we'd like to do here is to
refill cpu cache without holding a node lock.  To accomplish this purpose,
refill should be done after new slab allocation but before attaching the
slab to the management list.  So, this patch separates cache_grow() to two
parts, allocation and attaching to the list in order to add some code
inbetween them in the following patch.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 74 ---
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 1910589..2c28ad5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -213,6 +213,11 @@ static void slabs_destroy(struct kmem_cache *cachep, 
struct list_head *list);
 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
 static void cache_reap(struct work_struct *unused);
 
+static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
+   void **list);
+static inline void fixup_slab_list(struct kmem_cache *cachep,
+   struct kmem_cache_node *n, struct page *page,
+   void **list);
 static int slab_early_init = 1;
 
 #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
@@ -1797,7 +1802,7 @@ static size_t calculate_slab_order(struct kmem_cache 
*cachep,
 
/*
 * Needed to avoid possible looping condition
-* in cache_grow()
+* in cache_grow_begin()
 */
if (OFF_SLAB(freelist_cache))
continue;
@@ -2543,7 +2548,8 @@ static void slab_map_pages(struct kmem_cache *cache, 
struct page *page,
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static struct page *cache_grow_begin(struct kmem_cache *cachep,
+   gfp_t flags, int nodeid)
 {
void *freelist;
size_t offset;
@@ -2609,21 +2615,40 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t 
flags, int nodeid)
 
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
-   check_irq_off();
-   spin_lock(>list_lock);
 
-   /* Make slab active. */
-   list_add_tail(>lru, &(n->slabs_free));
-   STATS_INC_GROWN(cachep);
-   n->free_objects += cachep->num;
-   spin_unlock(>list_lock);
-   return page_node;
+   return page;
+
 opps1:
kmem_freepages(cachep, page);
 failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
-   return -1;
+   return NULL;
+}
+
+static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+{
+   struct kmem_cache_node *n;
+   void *list = NULL;
+
+   check_irq_off();
+
+   if (!page)
+   return;
+
+   INIT_LIST_HEAD(>lru);
+   n = get_node(cachep, page_to_nid(page));
+
+   spin_lock(>list_lock);
+   if (!page->active)
+   list_add_tail(>lru, &(n->slabs_free));
+   else
+   fixup_slab_list(cachep, n, page, );
+   STATS_INC_GROWN(cachep);
+   n->free_objects += cachep->num - page->active;
+   spin_unlock(>list_lock);
+
+   fixup_objfreelist_debug(cachep, );
 }
 
 #if DEBUG
@@ -2834,6 +2859,7 @@ static void *cache_alloc_refill(struct kmem_cache 
*cachep, gfp_t flags)
struct array_cache *ac;
int node;
void *list = NULL;
+   struct page *page;
 
check_irq_off();
node = numa_mem_id();
@@ -2861,7 +2887,6 @@ retry:
}
 
while (batchcount > 0) {
-   struct page *page;
/* Get slab alloc is to come from. */
page = get_first_slab(n, false);
if (!page)
@@ -2894,8 +2919,6 @@ alloc_done:
fixup_objfreelist_debug(cachep, );
 
if (unlikely(!ac->avail)) {
-   int x;
-
/* Check if we can use obj in pfmemalloc slab */
if (sk_memalloc_socks()) {
void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
@@ -2904,14 +2927,18 @@ alloc_done:
return obj;
}
 
-   x = cache_grow(cachep, gfp_exact_node(flags), node);
+   page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
+   cache_grow_end(cachep, page);
 
-   /* cache_grow can reenable interrupts, then ac could change. */
+   /*
+* cache_grow_begin() can reenable interrupts,
+* then ac could change.
+*/
ac = cpu_cache_get(cachep);