The write-argument to cmpxchg_double() must be 16-byte aligned.
We used to align 'struct page' itself in order to guarantee this,
but that wastes 8-bytes per page.  Instead, we take 8-bytes
internal to the page before page->counters and move freelist
between there and the existing 8-bytes after counters.  That way,
no matter how 'stuct page' itself is aligned, we can ensure that
we have a 16-byte area with which to to this cmpxchg.



---

 linux.git-davehans/include/linux/mm_types.h |   17 +++++--
 linux.git-davehans/mm/slab.c                |    2 
 linux.git-davehans/mm/slab.h                |    1 
 linux.git-davehans/mm/slob.c                |    2 
 linux.git-davehans/mm/slub.c                |   67 +++++++++++++++++++++++-----
 5 files changed, 74 insertions(+), 15 deletions(-)

diff -puN include/linux/mm_types.h~move-around-freelist-to-align 
include/linux/mm_types.h
--- linux.git/include/linux/mm_types.h~move-around-freelist-to-align    
2013-12-11 13:19:54.334963497 -0800
+++ linux.git-davehans/include/linux/mm_types.h 2013-12-11 13:19:54.344963939 
-0800
@@ -140,11 +140,20 @@ struct slab_page {
        /* First double word block */
        unsigned long flags;            /* Atomic flags, some possibly
                                         * updated asynchronously */
-       void *s_mem;                    /* slab first object */
+       union {
+               void *s_mem;                    /* slab first object */
+               /*
+                * The combination of ->counters and ->freelist
+                * need to be doubleword-aligned in order for
+                * slub's cmpxchg_double() to work properly.
+                * slub does not use 's_mem', so we reuse it here
+                * so we can always have alignment no matter how
+                * struct page is aligned.
+                */
+               void *_freelist_first;          /* sl[aou]b first free object */
+       };
 
        /* Second double word */
-       void *_freelist;                /* sl[aou]b first free object */
-
        union {
 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
        defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
@@ -173,6 +182,8 @@ struct slab_page {
                unsigned int active;    /* SLAB */
        };
 
+       void *_freelist_second;         /* sl[aou]b first free object */
+
        /* Third double word block */
        union {
                struct {                /* slub per cpu partial pages */
diff -puN mm/slab.c~move-around-freelist-to-align mm/slab.c
--- linux.git/mm/slab.c~move-around-freelist-to-align   2013-12-11 
13:19:54.335963541 -0800
+++ linux.git-davehans/mm/slab.c        2013-12-11 13:19:54.345963983 -0800
@@ -1952,7 +1952,7 @@ static void slab_destroy_debugcheck(stru
 
 static inline unsigned int **slab_freelist_ptr(struct slab_page *page)
 {
-       return (unsigned int **)&page->_freelist;
+       return (unsigned int **)&page->_freelist_first;
 }
 
 static inline unsigned int *slab_freelist(struct slab_page *page)
diff -puN mm/slab.h~move-around-freelist-to-align mm/slab.h
--- linux.git/mm/slab.h~move-around-freelist-to-align   2013-12-11 
13:19:54.337963630 -0800
+++ linux.git-davehans/mm/slab.h        2013-12-11 13:19:54.346964027 -0800
@@ -278,3 +278,4 @@ struct kmem_cache_node {
 
 void *slab_next(struct seq_file *m, void *p, loff_t *pos);
 void slab_stop(struct seq_file *m, void *p);
+
diff -puN mm/slob.c~move-around-freelist-to-align mm/slob.c
--- linux.git/mm/slob.c~move-around-freelist-to-align   2013-12-11 
13:19:54.339963718 -0800
+++ linux.git-davehans/mm/slob.c        2013-12-11 13:19:54.346964027 -0800
@@ -213,7 +213,7 @@ static void slob_free_pages(void *b, int
 
 static inline void **slab_freelist_ptr(struct slab_page *sp)
 {
-       return &sp->_freelist;
+       return &sp->_freelist_first;
 }
 
 static inline void *slab_freelist(struct slab_page *sp)
diff -puN mm/slub.c~move-around-freelist-to-align mm/slub.c
--- linux.git/mm/slub.c~move-around-freelist-to-align   2013-12-11 
13:19:54.340963762 -0800
+++ linux.git-davehans/mm/slub.c        2013-12-11 13:19:54.348964116 -0800
@@ -228,9 +228,23 @@ static inline void stat(const struct kme
 #endif
 }
 
-static inline void **slab_freelist_ptr(struct slab_page *spage)
+static inline bool ptr_doubleword_aligned(void *ptr)
 {
-       return &spage->_freelist;
+       int doubleword_bytes = BITS_PER_LONG * 2 / 8;
+       if (PTR_ALIGN(ptr, doubleword_bytes) == ptr)
+               return 1;
+       return 0;
+}
+
+void **slab_freelist_ptr(struct slab_page *spage)
+{
+       /*
+        * If counters is aligned, then we use the ->freelist
+        * slot _after_ it.
+        */
+       if (ptr_doubleword_aligned(&spage->counters))
+               return &spage->_freelist_second;
+       return &spage->_freelist_first;
 }
 
 static inline void *slab_freelist(struct slab_page *spage)
@@ -380,6 +394,39 @@ static __always_inline void slab_unlock(
        __bit_spin_unlock(PG_locked, &page->flags);
 }
 
+/*
+ * Take two adjecent 8b-aligned, but non-doubleword-aligned
+ * arguments and swap them around to guarantee that the
+ * first arg is doubleword-aligned.
+ *
+ * The write-argument to cmpxchg_double() must be 16-byte
+ * aligned.  We used to align 'struct page' itself in order
+ * to guarantee this, but that wastes 8-bytes per page.
+ * Instead, we take 8-bytes internal to the page before
+ * page->counters and move freelist between there and the
+ * existing 8-bytes after counters.  That way, no matter
+ * how 'stuct page' itself is aligned, we can ensure that
+ * we have a 16-byte area with which to to this cmpxchg.
+ */
+static inline bool __cmpxchg_double_slab_unaligned(struct slab_page *page,
+               void *freelist_old, unsigned long counters_old,
+               void *freelist_new, unsigned long counters_new)
+{
+       void **freelist = slab_freelist_ptr(page);
+       if (ptr_doubleword_aligned(&page->counters)) {
+               if (cmpxchg_double(&page->counters, freelist,
+                       counters_old, freelist_old,
+                       counters_new, freelist_new))
+                       return 1;
+       } else {
+               if (cmpxchg_double(freelist, &page->counters,
+                       freelist_old, counters_old,
+                       freelist_new, counters_new))
+                       return 1;
+       }
+       return 0;
+}
+
 /* Interrupts must be disabled (for the fallback code to work right) */
 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct 
slab_page *page,
                void *freelist_old, unsigned long counters_old,
@@ -390,10 +437,10 @@ static inline bool __cmpxchg_double_slab
 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
        if (s->flags & __CMPXCHG_DOUBLE) {
-               if (cmpxchg_double(slab_freelist_ptr(page), &page->counters,
-                       freelist_old, counters_old,
-                       freelist_new, counters_new))
-               return 1;
+               if (__cmpxchg_double_slab_unaligned(page,
+                               freelist_old, counters_old,
+                               freelist_new, counters_new))
+                       return 1;
        } else
 #endif
        {
@@ -426,10 +473,10 @@ static inline bool cmpxchg_double_slab(s
 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
        if (s->flags & __CMPXCHG_DOUBLE) {
-               if (cmpxchg_double(slab_freelist_ptr(page), &page->counters,
-                       freelist_old, counters_old,
-                       freelist_new, counters_new))
-               return 1;
+               if (__cmpxchg_double_slab_unaligned(page,
+                               freelist_old, counters_old,
+                               freelist_new, counters_new))
+                       return 1;
        } else
 #endif
        {
_
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to