This patch introduces a flag SLAB_MINIMIZE_WASTE for slab and slub. This
flag causes allocation of larger slab caches in order to minimize wasted
space.

This is needed because we want to use dm-bufio for deduplication index and
there are existing installations with non-power-of-two block sizes (such
as 640KB). The performance of the whole solution depends on efficient
memory use, so we must waste as little memory as possible.

Signed-off-by: Mikulas Patocka <mpato...@redhat.com>

---
 drivers/md/dm-bufio.c |    2 +-
 include/linux/slab.h  |    7 +++++++
 mm/slab.c             |    4 ++--
 mm/slab.h             |    7 ++++---
 mm/slab_common.c      |    2 +-
 mm/slub.c             |   25 ++++++++++++++++++++-----
 6 files changed, 35 insertions(+), 12 deletions(-)

Index: linux-2.6/include/linux/slab.h
===================================================================
--- linux-2.6.orig/include/linux/slab.h 2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/include/linux/slab.h      2018-04-16 21:10:45.000000000 +0200
@@ -108,6 +108,13 @@
 #define SLAB_KASAN             0
 #endif
 
+/*
+ * Use higer order allocations to minimize wasted space.
+ * Note: the allocation is unreliable if this flag is used, the caller
+ * must handle allocation failures gracefully.
+ */
+#define SLAB_MINIMIZE_WASTE    ((slab_flags_t __force)0x10000000U)
+
 /* The following flags affect the page allocator grouping pages by mobility */
 /* Objects are reclaimable */
 #define SLAB_RECLAIM_ACCOUNT   ((slab_flags_t __force)0x00020000U)
Index: linux-2.6/mm/slab_common.c
===================================================================
--- linux-2.6.orig/mm/slab_common.c     2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/mm/slab_common.c  2018-04-16 21:10:45.000000000 +0200
@@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_d
                SLAB_FAILSLAB | SLAB_KASAN)
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
-                        SLAB_ACCOUNT)
+                        SLAB_ACCOUNT | SLAB_MINIMIZE_WASTE)
 
 /*
  * Merge control. If this is set then no merging of slab caches will occur.
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c    2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/mm/slub.c 2018-04-16 21:12:41.000000000 +0200
@@ -3249,7 +3249,7 @@ static inline unsigned int slab_order(un
        return order;
 }
 
-static inline int calculate_order(unsigned int size, unsigned int reserved)
+static inline int calculate_order(unsigned int size, unsigned int reserved, 
slab_flags_t flags)
 {
        unsigned int order;
        unsigned int min_objects;
@@ -3277,7 +3277,7 @@ static inline int calculate_order(unsign
                        order = slab_order(size, min_objects,
                                        slub_max_order, fraction, reserved);
                        if (order <= slub_max_order)
-                               return order;
+                               goto ret_order;
                        fraction /= 2;
                }
                min_objects--;
@@ -3289,15 +3289,30 @@ static inline int calculate_order(unsign
         */
        order = slab_order(size, 1, slub_max_order, 1, reserved);
        if (order <= slub_max_order)
-               return order;
+               goto ret_order;
 
        /*
         * Doh this slab cannot be placed using slub_max_order.
         */
        order = slab_order(size, 1, MAX_ORDER, 1, reserved);
        if (order < MAX_ORDER)
-               return order;
+               goto ret_order;
        return -ENOSYS;
+
+ret_order:
+       if (flags & SLAB_MINIMIZE_WASTE) {
+               /* Increase the order if it decreases waste */
+               int test_order;
+               for (test_order = order + 1; test_order < MAX_ORDER; 
test_order++) {
+                       unsigned long order_objects = ((PAGE_SIZE << order) - 
reserved) / size;
+                       unsigned long test_order_objects = ((PAGE_SIZE << 
test_order) - reserved) / size;
+                       if (test_order_objects >= min(32, MAX_OBJS_PER_PAGE))
+                               break;
+                       if (test_order_objects > order_objects << (test_order - 
order))
+                               order = test_order;
+               }
+       }
+       return order;
 }
 
 static void
@@ -3562,7 +3577,7 @@ static int calculate_sizes(struct kmem_c
        if (forced_order >= 0)
                order = forced_order;
        else
-               order = calculate_order(size, s->reserved);
+               order = calculate_order(size, s->reserved, flags);
 
        if ((int)order < 0)
                return 0;
Index: linux-2.6/drivers/md/dm-bufio.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-bufio.c        2018-04-16 21:10:45.000000000 
+0200
+++ linux-2.6/drivers/md/dm-bufio.c     2018-04-16 21:11:23.000000000 +0200
@@ -1683,7 +1683,7 @@ struct dm_bufio_client *dm_bufio_client_
            (block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
                snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", 
c->block_size);
                c->slab_cache = kmem_cache_create(slab_name, c->block_size, 
ARCH_KMALLOC_MINALIGN,
-                                                 SLAB_RECLAIM_ACCOUNT, NULL);
+                                                 SLAB_RECLAIM_ACCOUNT | 
SLAB_MINIMIZE_WASTE, NULL);
                if (!c->slab_cache) {
                        r = -ENOMEM;
                        goto bad;
Index: linux-2.6/mm/slab.h
===================================================================
--- linux-2.6.orig/mm/slab.h    2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/mm/slab.h 2018-04-16 21:10:45.000000000 +0200
@@ -142,10 +142,10 @@ static inline slab_flags_t kmem_cache_fl
 #if defined(CONFIG_SLAB)
 #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
                          SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
-                         SLAB_ACCOUNT)
+                         SLAB_ACCOUNT | SLAB_MINIMIZE_WASTE)
 #elif defined(CONFIG_SLUB)
 #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
-                         SLAB_TEMPORARY | SLAB_ACCOUNT)
+                         SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_MINIMIZE_WASTE)
 #else
 #define SLAB_CACHE_FLAGS (0)
 #endif
@@ -164,7 +164,8 @@ static inline slab_flags_t kmem_cache_fl
                              SLAB_NOLEAKTRACE | \
                              SLAB_RECLAIM_ACCOUNT | \
                              SLAB_TEMPORARY | \
-                             SLAB_ACCOUNT)
+                             SLAB_ACCOUNT | \
+                             SLAB_MINIMIZE_WASTE)
 
 bool __kmem_cache_empty(struct kmem_cache *);
 int __kmem_cache_shutdown(struct kmem_cache *);
Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c    2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/mm/slab.c 2018-04-16 21:10:45.000000000 +0200
@@ -1790,14 +1790,14 @@ static size_t calculate_slab_order(struc
                 * as GFP_NOFS and we really don't want to have to be allocating
                 * higher-order pages when we are unable to shrink dcache.
                 */
-               if (flags & SLAB_RECLAIM_ACCOUNT)
+               if (flags & SLAB_RECLAIM_ACCOUNT && !(flags & 
SLAB_MINIMIZE_WASTE))
                        break;
 
                /*
                 * Large number of objects is good, but very large slabs are
                 * currently bad for the gfp()s.
                 */
-               if (gfporder >= slab_max_order)
+               if (gfporder >= slab_max_order && !(flags & 
SLAB_MINIMIZE_WASTE))
                        break;
 
                /*

Reply via email to