This patch enables zsmalloc compaction so that user can use it
via calling zs_compact(pool).

The migration policy is as follows,

1. find migration target objects in ZS_ALMOST_EMPTY
2. find free space in ZS_ALMOST_FULL. With no found, find it in ZS_ALMOST_EMPTY.
3. migrate objects get by 1 to free spaces get by 2
4. repeat [1-3] on each size class

Signed-off-by: Minchan Kim <[email protected]>
---
 include/linux/zsmalloc.h |   1 +
 mm/zsmalloc.c            | 344 ++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 330 insertions(+), 15 deletions(-)

diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 05c214760977..04ecd3fc4283 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -47,5 +47,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long 
handle,
 void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
 
 unsigned long zs_get_total_pages(struct zs_pool *pool);
+unsigned long zs_compact(struct zs_pool *pool);
 
 #endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 16c40081c22e..304595d97610 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -227,6 +227,7 @@ struct zs_pool {
        struct size_class **size_class;
        struct size_class *handle_class;
 
+       rwlock_t  migrate_lock;
        gfp_t flags;    /* allocation flags used when growing pool */
        atomic_long_t pages_allocated;
 };
@@ -618,6 +619,24 @@ static unsigned long handle_to_obj(struct zs_pool *pool, 
unsigned long handle)
        return obj;
 }
 
+static unsigned long obj_to_handle(struct zs_pool *pool,
+                               struct size_class *class, unsigned long obj)
+{
+       struct page *page;
+       unsigned long obj_idx, off;
+       unsigned long handle;
+       void *addr;
+
+       obj_to_location(obj, &page, &obj_idx);
+       off = obj_idx_to_offset(page, obj_idx, class->size);
+
+       addr = kmap_atomic(page);
+       handle = *(unsigned long *)(addr + off);
+       kunmap_atomic(addr);
+
+       return handle;
+}
+
 static unsigned long alloc_handle(struct zs_pool *pool)
 {
        unsigned long handle;
@@ -1066,6 +1085,8 @@ struct zs_pool *zs_create_pool(gfp_t flags)
        if (!pool)
                return NULL;
 
+       rwlock_init(&pool->migrate_lock);
+
        if (create_handle_class(pool, ZS_HANDLE_SIZE))
                goto err;
 
@@ -1157,20 +1178,41 @@ void zs_destroy_pool(struct zs_pool *pool)
 }
 EXPORT_SYMBOL_GPL(zs_destroy_pool);
 
-static unsigned long __zs_malloc(struct zs_pool *pool,
-               struct size_class *class, gfp_t flags, unsigned long handle)
+static unsigned long __obj_malloc(struct page *first_page,
+               struct size_class *class, unsigned long handle)
 {
        unsigned long obj;
        struct link_free *link;
-       struct page *first_page, *m_page;
+       struct page *m_page;
        unsigned long m_objidx, m_offset;
        void *vaddr;
 
+       obj = (unsigned long)first_page->freelist;
+       obj_to_location(obj, &m_page, &m_objidx);
+       m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
+
+       vaddr = kmap_atomic(m_page);
+       link = (struct link_free *)vaddr + m_offset / sizeof(*link);
+       first_page->freelist = link->next;
+       link->handle = handle;
+       kunmap_atomic(vaddr);
+
+       first_page->inuse++;
+       return obj;
+}
+
+static unsigned long __zs_malloc(struct zs_pool *pool,
+               struct size_class *class, gfp_t flags, unsigned long handle)
+{
+       struct page *first_page;
+       unsigned long obj;
+
        spin_lock(&class->lock);
        first_page = find_get_zspage(class);
 
        if (!first_page) {
                spin_unlock(&class->lock);
+               read_unlock(&pool->migrate_lock);
                first_page = alloc_zspage(class, flags);
                if (unlikely(!first_page))
                        return 0;
@@ -1178,21 +1220,11 @@ static unsigned long __zs_malloc(struct zs_pool *pool,
                set_zspage_mapping(first_page, class->index, ZS_EMPTY);
                atomic_long_add(class->pages_per_zspage,
                                        &pool->pages_allocated);
+               read_lock(&pool->migrate_lock);
                spin_lock(&class->lock);
        }
 
-       obj = (unsigned long)first_page->freelist;
-       obj_to_location(obj, &m_page, &m_objidx);
-       m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
-
-       vaddr = kmap_atomic(m_page);
-       link = (struct link_free *)vaddr + m_offset / sizeof(*link);
-       first_page->freelist = link->next;
-       link->handle = handle;
-       kunmap_atomic(vaddr);
-
-       first_page->inuse++;
-
+       obj = __obj_malloc(first_page, class, handle);
        if (handle) {
                unsigned long *h_addr;
 
@@ -1225,6 +1257,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
        if (unlikely(!size || (size + ZS_HANDLE_SIZE) > ZS_MAX_ALLOC_SIZE))
                return 0;
 
+       read_lock(&pool->migrate_lock);
        /* allocate handle */
        handle = alloc_handle(pool);
        if (!handle)
@@ -1240,6 +1273,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
                goto out;
        }
 out:
+       read_unlock(&pool->migrate_lock);
        return handle;
 }
 EXPORT_SYMBOL_GPL(zs_malloc);
@@ -1299,6 +1333,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
        if (unlikely(!handle))
                return;
 
+       read_lock(&pool->migrate_lock);
        obj = handle_to_obj(pool, handle);
        /* free handle */
        free_handle(pool, handle);
@@ -1311,6 +1346,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
        class = pool->size_class[class_idx];
 
        __zs_free(pool, class, obj);
+       read_unlock(&pool->migrate_lock);
 }
 EXPORT_SYMBOL_GPL(zs_free);
 
@@ -1343,6 +1379,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long 
handle,
 
        BUG_ON(!handle);
 
+       read_lock(&pool->migrate_lock);
        /*
         * Because we use per-cpu mapping areas shared among the
         * pools/users, we can't allow mapping in interrupt context
@@ -1405,6 +1442,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long 
handle)
                __zs_unmap_object(area, pages, off, class->size);
        }
        put_cpu_var(zs_map_area);
+       read_unlock(&pool->migrate_lock);
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
@@ -1414,6 +1452,282 @@ unsigned long zs_get_total_pages(struct zs_pool *pool)
 }
 EXPORT_SYMBOL_GPL(zs_get_total_pages);
 
+static void zs_object_copy(unsigned long src, unsigned long dst,
+                               struct size_class *class)
+{
+       struct page *s_page, *d_page;
+       unsigned long s_objidx, d_objidx;
+       unsigned long s_off, d_off;
+       void *s_addr, *d_addr;
+       int s_size, d_size, size;
+       int written = 0;
+
+       s_size = d_size = class->size;
+
+       obj_to_location(src, &s_page, &s_objidx);
+       obj_to_location(dst, &d_page, &d_objidx);
+
+       s_off = obj_idx_to_offset(s_page, s_objidx, class->size);
+       d_off = obj_idx_to_offset(d_page, d_objidx, class->size);
+
+       if (s_off + class->size > PAGE_SIZE)
+               s_size = PAGE_SIZE - s_off;
+
+       if (d_off + class->size > PAGE_SIZE)
+               d_size = PAGE_SIZE - d_off;
+
+       s_addr = kmap_atomic(s_page);
+       d_addr = kmap_atomic(d_page);
+
+       while (1) {
+               size = min(s_size, d_size);
+               memcpy(d_addr + d_off, s_addr + s_off, size);
+               written += size;
+
+               if (written == class->size)
+                       break;
+
+               if (s_off + size >= PAGE_SIZE) {
+                       kunmap_atomic(s_addr);
+                       s_page = get_next_page(s_page);
+                       BUG_ON(!s_page);
+                       s_addr = kmap_atomic(s_page);
+                       s_size = class->size - written;
+                       s_off = 0;
+               } else {
+                       s_off += size;
+                       s_size -= size;
+               }
+
+               if (d_off + size >= PAGE_SIZE) {
+                       kunmap_atomic(d_addr);
+                       d_page = get_next_page(d_page);
+                       BUG_ON(!d_page);
+                       d_addr = kmap_atomic(d_page);
+                       d_size = class->size - written;
+                       d_off = 0;
+               } else {
+                       d_off += size;
+                       d_size -= size;
+               }
+       }
+
+       kunmap_atomic(s_addr);
+       kunmap_atomic(d_addr);
+}
+
+static unsigned long find_alloced_obj(struct page *page, int index,
+                                       struct size_class *class)
+{
+       int offset = 0;
+       unsigned long obj = 0;
+       void *addr = kmap_atomic(page);
+
+       if (!is_first_page(page))
+               offset = page->index;
+       offset += class->size * index;
+
+       while (offset < PAGE_SIZE) {
+               if (*(unsigned long *)(addr + offset) & OBJ_ALLOCATED) {
+                       obj = (unsigned long)obj_location_to_handle(page,
+                                                               index);
+                       break;
+               }
+
+               offset += class->size;
+               index++;
+       }
+
+       kunmap_atomic(addr);
+       return obj;
+}
+
+struct zs_compact_control {
+       struct page *s_page; /* from page for migration */
+       int index; /* start index from @s_page for finding used object */
+       struct page *d_page; /* to page for migration */
+       unsigned long nr_migrated;
+       int nr_to_migrate;
+};
+
+static void migrate_zspage(struct zs_pool *pool, struct zs_compact_control *cc,
+                               struct size_class *class)
+{
+       unsigned long used_obj, free_obj;
+       unsigned long handle;
+       struct page *s_page = cc->s_page;
+       unsigned long index = cc->index;
+       struct page *d_page = cc->d_page;
+       unsigned long *h_addr;
+       bool exit = false;
+
+       BUG_ON(!is_first_page(d_page));
+
+       while (1) {
+               used_obj = find_alloced_obj(s_page, index, class);
+               if (!used_obj) {
+                       s_page = get_next_page(s_page);
+                       if (!s_page)
+                               break;
+                       index = 0;
+                       continue;
+               }
+
+               if (d_page->inuse == d_page->objects)
+                       break;
+
+               free_obj = __obj_malloc(d_page, class, 0);
+
+               zs_object_copy(used_obj, free_obj, class);
+
+               obj_to_location(used_obj, &s_page, &index);
+               index++;
+
+               handle = obj_to_handle(pool, class, used_obj);
+               h_addr = handle_to_addr(pool, handle);
+               BUG_ON(*h_addr != used_obj);
+               *h_addr = free_obj;
+               cc->nr_migrated++;
+
+               /* Don't need a class->lock due to migrate_lock */
+               insert_zspage(get_first_page(s_page), class, ZS_ALMOST_EMPTY);
+
+               /*
+                * I don't want __zs_free has return value in case of freeing
+                * zspage for slow path so let's check page->inuse count
+                * right before __zs_free and then exit if it is last object.
+                */
+               if (get_first_page(s_page)->inuse == 1)
+                       exit = true;
+
+               __zs_free(pool, class, used_obj);
+               if (exit)
+                       break;
+
+               remove_zspage(get_first_page(s_page), class, ZS_ALMOST_EMPTY);
+       }
+
+       cc->s_page = s_page;
+       cc->index = index;
+}
+
+static struct page *alloc_target_page(struct size_class *class)
+{
+       int i;
+       struct page *page;
+
+       spin_lock(&class->lock);
+       for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
+               page = class->fullness_list[i];
+               if (page) {
+                       remove_zspage(page, class, i);
+                       break;
+               }
+       }
+       spin_unlock(&class->lock);
+
+       return page;
+}
+
+static void putback_target_page(struct page *page, struct size_class *class)
+{
+       int class_idx;
+       enum fullness_group currfg;
+
+       BUG_ON(!is_first_page(page));
+
+       spin_lock(&class->lock);
+       get_zspage_mapping(page, &class_idx, &currfg);
+       insert_zspage(page, class, currfg);
+       fix_fullness_group(class, page);
+       spin_unlock(&class->lock);
+}
+
+static struct page *isolate_source_page(struct size_class *class)
+{
+       struct page *page;
+
+       spin_lock(&class->lock);
+       page = class->fullness_list[ZS_ALMOST_EMPTY];
+       if (page)
+               remove_zspage(page, class, ZS_ALMOST_EMPTY);
+       spin_unlock(&class->lock);
+
+       return page;
+}
+
+static void putback_source_page(struct page *page, struct size_class *class)
+{
+       spin_lock(&class->lock);
+       insert_zspage(page, class, ZS_ALMOST_EMPTY);
+       fix_fullness_group(class, page);
+       spin_unlock(&class->lock);
+}
+
+static unsigned long __zs_compact(struct zs_pool *pool,
+                               struct size_class *class)
+{
+       unsigned long nr_total_migrated = 0;
+       struct page *src_page, *dst_page;
+
+       write_lock(&pool->migrate_lock);
+       while ((src_page = isolate_source_page(class))) {
+               struct zs_compact_control cc;
+
+               BUG_ON(!is_first_page(src_page));
+
+               cc.index = 0;
+               cc.s_page = src_page;
+               cc.nr_to_migrate = src_page->inuse;
+               cc.nr_migrated = 0;
+
+               BUG_ON(0 >= cc.nr_to_migrate);
+retry:
+               dst_page = alloc_target_page(class);
+               if (!dst_page)
+                       break;
+               cc.d_page = dst_page;
+
+               migrate_zspage(pool, &cc, class);
+               putback_target_page(cc.d_page, class);
+
+               if (cc.nr_migrated < cc.nr_to_migrate)
+                       goto retry;
+
+               write_unlock(&pool->migrate_lock);
+               write_lock(&pool->migrate_lock);
+               nr_total_migrated += cc.nr_migrated;
+       }
+
+       if (src_page)
+               putback_source_page(src_page, class);
+
+       write_unlock(&pool->migrate_lock);
+
+       return nr_total_migrated;
+}
+
+unsigned long zs_compact(struct zs_pool *pool)
+{
+       int i;
+       unsigned long nr_migrated = 0;
+
+       for (i = 0; i < zs_size_classes; i++) {
+               struct size_class *class = pool->size_class[i];
+
+               if (!class)
+                       continue;
+
+               if (class->index != i)
+                       continue;
+
+               nr_migrated += __zs_compact(pool, class);
+       }
+
+       return nr_migrated;
+}
+EXPORT_SYMBOL_GPL(zs_compact);
+
 module_init(zs_init);
 module_exit(zs_exit);
 
-- 
2.0.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to