We don't support huge page when page is moved from page cache to swap.
So in this function, we enable huge page handling in two case:

1) when a huge page is found in the page cache,
2) or we need to alloc a huge page for page cache

We have to refactor all the calls to shmem_getpages to simplify the job
of caller. Right now shmem_getpage does:

1) simply request a page, default as a small page
2) or caller specify a flag to request either a huge page or a small page,
then leave the caller to decide how to use it

Signed-off-by: Ning Qu <[email protected]>
---
 mm/shmem.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 108 insertions(+), 31 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 447bd14..8fe17dd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -115,15 +115,43 @@ static unsigned long shmem_default_max_inodes(void)
 static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
 static int shmem_replace_page(struct page **pagep, gfp_t gfp,
                                struct shmem_inode_info *info, pgoff_t index);
+
 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
-       struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
+       struct page **pagep, enum sgp_type sgp, gfp_t gfp, int flags,
+       int *fault_type);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE
+static inline int shmem_getpage(struct inode *inode, pgoff_t index,
+       struct page **pagep, enum sgp_type sgp, gfp_t gfp, int flags,
+       int *fault_type)
+{
+       int ret = 0;
+       struct page *page = NULL;
 
+       if ((flags & AOP_FLAG_TRANSHUGE) &&
+           mapping_can_have_hugepages(inode->i_mapping)) {
+               ret = shmem_getpage_gfp(inode, index & ~HPAGE_CACHE_INDEX_MASK,
+                                       &page, sgp, gfp, flags,
+                                       NULL);
+               BUG_ON(page && !PageTransHugeCache(page));
+       }
+
+       if (!page) {
+               ret = shmem_getpage_gfp(inode, index, &page, sgp, gfp,
+                                       0, NULL);
+       }
+
+       *pagep = page;
+       return ret;
+}
+#else
 static inline int shmem_getpage(struct inode *inode, pgoff_t index,
-       struct page **pagep, enum sgp_type sgp, int *fault_type)
+       struct page **pagep, enum sgp_type sgp, gfp_t gfp, int flags,
+       int *fault_type)
 {
-       return shmem_getpage_gfp(inode, index, pagep, sgp,
-                       mapping_gfp_mask(inode->i_mapping), fault_type);
+       return shmem_getpage_gfp(inode, index, pagep, sgp, gfp, 0, NULL);
 }
+#endif
 
 static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
 {
@@ -585,7 +613,9 @@ static void shmem_undo_range(struct inode *inode, loff_t 
lstart, loff_t lend,
 
        if (partial_start) {
                struct page *page = NULL;
-               shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
+               gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
+
+               shmem_getpage(inode, start - 1, &page, SGP_READ, gfp, 0, NULL);
                if (page) {
                        unsigned int top = PAGE_CACHE_SIZE;
                        if (start > end) {
@@ -600,7 +630,9 @@ static void shmem_undo_range(struct inode *inode, loff_t 
lstart, loff_t lend,
        }
        if (partial_end) {
                struct page *page = NULL;
-               shmem_getpage(inode, end, &page, SGP_READ, NULL);
+               gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
+
+               shmem_getpage(inode, end, &page, SGP_READ, gfp, 0, NULL);
                if (page) {
                        zero_user_segment(page, 0, partial_end);
                        set_page_dirty(page);
@@ -1175,7 +1207,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t 
gfp,
  * entry since a page cannot live in both the swap and page cache
  */
 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
-       struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
+       struct page **pagep, enum sgp_type sgp, gfp_t gfp, int flags,
+       int *fault_type)
 {
        struct address_space *mapping = inode->i_mapping;
        struct shmem_inode_info *info;
@@ -1185,6 +1218,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t 
index,
        int error;
        int once = 0;
        int alloced = 0;
+       bool must_use_thp = flags & AOP_FLAG_TRANSHUGE;
+       int nr = 1;
 
        if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
                return -EFBIG;
@@ -1194,6 +1229,11 @@ repeat:
        if (radix_tree_exceptional_entry(page)) {
                swap = radix_to_swp_entry(page);
                page = NULL;
+               /* in swap, it's not a huge page for sure */
+               if (must_use_thp) {
+                       *pagep = NULL;
+                       return 0;
+               }
        }
 
        if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
@@ -1210,6 +1250,16 @@ repeat:
                page_cache_release(page);
                page = NULL;
        }
+
+       if (page) {
+               if (must_use_thp && !PageTransHugeCache(page)) {
+                       unlock_page(page);
+                       page_cache_release(page);
+                       *pagep = NULL;
+                       return 0;
+               }
+       }
+
        if (page || (sgp == SGP_READ && !swap.val)) {
                *pagep = page;
                return 0;
@@ -1298,27 +1348,37 @@ repeat:
                                error = -ENOSPC;
                                goto unacct;
                        }
-                       percpu_counter_inc(&sbinfo->used_blocks);
                }
 
-               page = shmem_alloc_page(gfp, info, index);
+               if (must_use_thp) {
+                       page = shmem_alloc_hugepage(gfp, info, index);
+                       if (page) {
+                               count_vm_event(THP_WRITE_ALLOC);
+                               nr = hpagecache_nr_pages(page);
+                       } else
+                               count_vm_event(THP_WRITE_ALLOC_FAILED);
+               } else {
+                       page = shmem_alloc_page(gfp, info, index);
+                       nr = 1;
+               }
+
                if (!page) {
                        error = -ENOMEM;
-                       goto decused;
+                       goto unacct;
                }
 
+               if (sbinfo->max_blocks)
+                       percpu_counter_add(&sbinfo->used_blocks, nr);
+
                SetPageSwapBacked(page);
                __set_page_locked(page);
                error = mem_cgroup_cache_charge(page, current->mm,
                                                gfp & GFP_RECLAIM_MASK);
                if (error)
                        goto decused;
-               error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
-               if (!error) {
-                       error = shmem_add_to_page_cache(page, mapping, index,
-                                                       gfp, NULL);
-                       radix_tree_preload_end();
-               }
+
+               error = shmem_add_to_page_cache(page, mapping, index,
+                                               gfp, NULL);
                if (error) {
                        mem_cgroup_uncharge_cache_page(page);
                        goto decused;
@@ -1326,8 +1386,8 @@ repeat:
                lru_cache_add_anon(page);
 
                spin_lock(&info->lock);
-               info->alloced++;
-               inode->i_blocks += BLOCKS_PER_PAGE;
+               info->alloced += nr;
+               inode->i_blocks += BLOCKS_PER_PAGE * nr;
                shmem_recalc_inode(inode);
                spin_unlock(&info->lock);
                alloced = true;
@@ -1344,7 +1404,7 @@ clear:
                 * it now, lest undo on failure cancel our earlier guarantee.
                 */
                if (sgp != SGP_WRITE) {
-                       clear_highpage(page);
+                       clear_pagecache_page(page);
                        flush_dcache_page(page);
                        SetPageUptodate(page);
                }
@@ -1378,7 +1438,7 @@ trunc:
 decused:
        sbinfo = SHMEM_SB(inode->i_sb);
        if (sbinfo->max_blocks)
-               percpu_counter_add(&sbinfo->used_blocks, -1);
+               percpu_counter_add(&sbinfo->used_blocks, -nr);
 unacct:
        shmem_unacct_blocks(info->flags, 1);
 failed:
@@ -1407,8 +1467,10 @@ static int shmem_fault(struct vm_area_struct *vma, 
struct vm_fault *vmf)
        struct inode *inode = file_inode(vma->vm_file);
        int error;
        int ret = VM_FAULT_LOCKED;
+       gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
 
-       error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
+       error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, gfp,
+                               0, &ret);
        if (error)
                return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
 
@@ -1544,7 +1606,9 @@ shmem_write_begin(struct file *file, struct address_space 
*mapping,
 {
        struct inode *inode = mapping->host;
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-       return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+       gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
+
+       return shmem_getpage(inode, index, pagep, SGP_WRITE, gfp, 0, NULL);
 }
 
 static int
@@ -1575,6 +1639,7 @@ shmem_write_end(struct file *file, struct address_space 
*mapping,
 static void do_shmem_file_read(struct file *filp, loff_t *ppos, 
read_descriptor_t *desc, read_actor_t actor)
 {
        struct inode *inode = file_inode(filp);
+       gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
        struct address_space *mapping = inode->i_mapping;
        pgoff_t index;
        unsigned long offset;
@@ -1606,7 +1671,8 @@ static void do_shmem_file_read(struct file *filp, loff_t 
*ppos, read_descriptor_
                                break;
                }
 
-               desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
+               desc->error = shmem_getpage(inode, index, &page, sgp, gfp,
+                                               0, NULL);
                if (desc->error) {
                        if (desc->error == -EINVAL)
                                desc->error = 0;
@@ -1716,6 +1782,7 @@ static ssize_t shmem_file_splice_read(struct file *in, 
loff_t *ppos,
 {
        struct address_space *mapping = in->f_mapping;
        struct inode *inode = mapping->host;
+       gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
        unsigned int loff, nr_pages, req_pages;
        struct page *pages[PIPE_DEF_BUFFERS];
        struct partial_page partial[PIPE_DEF_BUFFERS];
@@ -1754,7 +1821,8 @@ static ssize_t shmem_file_splice_read(struct file *in, 
loff_t *ppos,
        error = 0;
 
        while (spd.nr_pages < nr_pages) {
-               error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
+               error = shmem_getpage(inode, index, &page, SGP_CACHE, gfp,
+                                       0, NULL);
                if (error)
                        break;
                unlock_page(page);
@@ -1776,8 +1844,8 @@ static ssize_t shmem_file_splice_read(struct file *in, 
loff_t *ppos,
                page = spd.pages[page_nr];
 
                if (!PageUptodate(page) || page->mapping != mapping) {
-                       error = shmem_getpage(inode, index, &page,
-                                                       SGP_CACHE, NULL);
+                       error = shmem_getpage(inode, index, &page, SGP_CACHE,
+                                       gfp, 0, NULL);
                        if (error)
                                break;
                        unlock_page(page);
@@ -1969,9 +2037,11 @@ static long shmem_fallocate(struct file *file, int mode, 
loff_t offset,
                        error = -EINTR;
                else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
                        error = -ENOMEM;
-               else
+               else {
+                       gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
                        error = shmem_getpage(inode, index, &page, SGP_FALLOC,
-                                                                       NULL);
+                                             gfp, 0, NULL);
+               }
                if (error) {
                        /* Remove the !PageUptodate pages we added */
                        shmem_undo_range(inode,
@@ -2237,7 +2307,10 @@ static int shmem_symlink(struct inode *dir, struct 
dentry *dentry, const char *s
                }
                inode->i_op = &shmem_short_symlink_operations;
        } else {
-               error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
+               gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
+
+               error = shmem_getpage(inode, 0, &page, SGP_WRITE, gfp,
+                                       0, NULL);
                if (error) {
                        iput(inode);
                        return error;
@@ -2267,8 +2340,12 @@ static void *shmem_follow_short_symlink(struct dentry 
*dentry, struct nameidata
 
 static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
+       struct inode *inode = dentry->d_inode;
+       gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
        struct page *page = NULL;
-       int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
+       int error;
+
+       error = shmem_getpage(inode, 0, &page, SGP_READ, gfp, 0, NULL);
        nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
        if (page)
                unlock_page(page);
@@ -3131,7 +3208,7 @@ struct page *shmem_read_mapping_page_gfp(struct 
address_space *mapping,
        int error;
 
        BUG_ON(mapping->a_ops != &shmem_aops);
-       error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL);
+       error = shmem_getpage(inode, index, &page, SGP_CACHE, gfp, 0, NULL);
        if (error)
                page = ERR_PTR(error);
        else
-- 
1.8.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to