From: Mike Rapoport <r...@linux.ibm.com>

Removing a PAGE_SIZE page from the direct map every time such page is
allocated for a secret memory mapping will cause severe fragmentation of
the direct map. This fragmentation can be reduced by using PMD-size pages
as a pool for small pages for secret memory mappings.

Add a gen_pool per secretmem inode and lazily populate this pool with
PMD-size pages.

Signed-off-by: Mike Rapoport <r...@linux.ibm.com>
---
 mm/secretmem.c | 107 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 88 insertions(+), 19 deletions(-)

diff --git a/mm/secretmem.c b/mm/secretmem.c
index 3293f761076e..333eb18fb483 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -12,6 +12,7 @@
 #include <linux/bitops.h>
 #include <linux/printk.h>
 #include <linux/pagemap.h>
+#include <linux/genalloc.h>
 #include <linux/syscalls.h>
 #include <linux/pseudo_fs.h>
 #include <linux/set_memory.h>
@@ -40,24 +41,66 @@
 #define SECRETMEM_FLAGS_MASK   SECRETMEM_MODE_MASK
 
 struct secretmem_ctx {
+       struct gen_pool *pool;
        unsigned int mode;
 };
 
-static struct page *secretmem_alloc_page(gfp_t gfp)
+static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
 {
-       /*
-        * FIXME: use a cache of large pages to reduce the direct map
-        * fragmentation
-        */
-       return alloc_page(gfp);
+       unsigned long nr_pages = (1 << PMD_PAGE_ORDER);
+       struct gen_pool *pool = ctx->pool;
+       unsigned long addr;
+       struct page *page;
+       int err;
+
+       page = alloc_pages(gfp, PMD_PAGE_ORDER);
+       if (!page)
+               return -ENOMEM;
+
+       addr = (unsigned long)page_address(page);
+       split_page(page, PMD_PAGE_ORDER);
+
+       err = gen_pool_add(pool, addr, PMD_SIZE, NUMA_NO_NODE);
+       if (err) {
+               __free_pages(page, PMD_PAGE_ORDER);
+               return err;
+       }
+
+       __kernel_map_pages(page, nr_pages, 0);
+
+       return 0;
+}
+
+static struct page *secretmem_alloc_page(struct secretmem_ctx *ctx,
+                                        gfp_t gfp)
+{
+       struct gen_pool *pool = ctx->pool;
+       unsigned long addr;
+       struct page *page;
+       int err;
+
+       if (gen_pool_avail(pool) < PAGE_SIZE) {
+               err = secretmem_pool_increase(ctx, gfp);
+               if (err)
+                       return NULL;
+       }
+
+       addr = gen_pool_alloc(pool, PAGE_SIZE);
+       if (!addr)
+               return NULL;
+
+       page = virt_to_page(addr);
+       get_page(page);
+
+       return page;
 }
 
 static vm_fault_t secretmem_fault(struct vm_fault *vmf)
 {
+       struct secretmem_ctx *ctx = vmf->vma->vm_file->private_data;
        struct address_space *mapping = vmf->vma->vm_file->f_mapping;
        struct inode *inode = file_inode(vmf->vma->vm_file);
        pgoff_t offset = vmf->pgoff;
-       unsigned long addr;
        struct page *page;
        int ret = 0;
 
@@ -66,7 +109,7 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
 
        page = find_get_entry(mapping, offset);
        if (!page) {
-               page = secretmem_alloc_page(vmf->gfp_mask);
+               page = secretmem_alloc_page(ctx, vmf->gfp_mask);
                if (!page)
                        return vmf_error(-ENOMEM);
 
@@ -74,14 +117,8 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
                if (unlikely(ret))
                        goto err_put_page;
 
-               ret = set_direct_map_invalid_noflush(page);
-               if (ret)
-                       goto err_del_page_cache;
-
-               addr = (unsigned long)page_address(page);
-               flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
-
                __SetPageUptodate(page);
+               set_page_private(page, (unsigned long)ctx);
 
                ret = VM_FAULT_LOCKED;
        }
@@ -89,8 +126,6 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
        vmf->page = page;
        return ret;
 
-err_del_page_cache:
-       delete_from_page_cache(page);
 err_put_page:
        put_page(page);
        return vmf_error(ret);
@@ -138,7 +173,11 @@ static int secretmem_migratepage(struct address_space 
*mapping,
 
 static void secretmem_freepage(struct page *page)
 {
-       set_direct_map_default_noflush(page);
+       unsigned long addr = (unsigned long)page_address(page);
+       struct secretmem_ctx *ctx = (struct secretmem_ctx *)page_private(page);
+       struct gen_pool *pool = ctx->pool;
+
+       gen_pool_free(pool, addr, PAGE_SIZE);
 }
 
 static const struct address_space_operations secretmem_aops = {
@@ -163,13 +202,18 @@ static struct file *secretmem_file_create(unsigned long 
flags)
        if (!ctx)
                goto err_free_inode;
 
+       ctx->pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
+       if (!ctx->pool)
+               goto err_free_ctx;
+
        file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
                                 O_RDWR, &secretmem_fops);
        if (IS_ERR(file))
-               goto err_free_ctx;
+               goto err_free_pool;
 
        mapping_set_unevictable(inode->i_mapping);
 
+       inode->i_private = ctx;
        inode->i_mapping->private_data = ctx;
        inode->i_mapping->a_ops = &secretmem_aops;
 
@@ -183,6 +227,8 @@ static struct file *secretmem_file_create(unsigned long 
flags)
 
        return file;
 
+err_free_pool:
+       gen_pool_destroy(ctx->pool);
 err_free_ctx:
        kfree(ctx);
 err_free_inode:
@@ -221,11 +267,34 @@ SYSCALL_DEFINE1(memfd_secret, unsigned long, flags)
        return err;
 }
 
+static void secretmem_cleanup_chunk(struct gen_pool *pool,
+                                   struct gen_pool_chunk *chunk, void *data)
+{
+       unsigned long start = chunk->start_addr;
+       unsigned long end = chunk->end_addr;
+       unsigned long nr_pages, addr;
+
+       nr_pages = (end - start + 1) / PAGE_SIZE;
+       __kernel_map_pages(virt_to_page(start), nr_pages, 1);
+
+       for (addr = start; addr < end; addr += PAGE_SIZE)
+               put_page(virt_to_page(addr));
+}
+
+static void secretmem_cleanup_pool(struct secretmem_ctx *ctx)
+{
+       struct gen_pool *pool = ctx->pool;
+
+       gen_pool_for_each_chunk(pool, secretmem_cleanup_chunk, ctx);
+       gen_pool_destroy(pool);
+}
+
 static void secretmem_evict_inode(struct inode *inode)
 {
        struct secretmem_ctx *ctx = inode->i_private;
 
        truncate_inode_pages_final(&inode->i_data);
+       secretmem_cleanup_pool(ctx);
        clear_inode(inode);
        kfree(ctx);
 }
-- 
2.28.0

Reply via email to