Modify truncate_hugepages() to take a range of pages (start, end)
instead of simply start.  If the value of end is -1, this indicates
the end of the range is the end of the file.  This functionality
will be used for fallocate hole punching.

Downstream of truncate_hugepages, the routines hugetlb_unreserve_pages
must also be modified to accept a range of pages.

A new region tracking/resv_map routine region_del() is added to delete
a range of regions within the reserve maps.  As in truncate_hugepages,
a range end value of -1 indicates all regions after the starting value
should be deleted.

Based-on code-by: Dave Hansen <[email protected]>
Signed-off-by: Mike Kravetz <[email protected]>
---
 fs/hugetlbfs/inode.c    | 31 +++++++++++++++-----
 include/linux/hugetlb.h |  3 +-
 mm/hugetlb.c            | 76 +++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 100 insertions(+), 10 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c274aca..2faf2c4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -324,19 +324,32 @@ static void truncate_huge_page(struct page *page)
        delete_from_page_cache(page);
 }
 
-static void truncate_hugepages(struct inode *inode, loff_t lstart)
+static void truncate_hugepages(struct inode *inode, loff_t lstart, loff_t lend)
 {
        struct hstate *h = hstate_inode(inode);
        struct address_space *mapping = &inode->i_data;
        const pgoff_t start = lstart >> huge_page_shift(h);
+       const pgoff_t end = lend >> huge_page_shift(h);
        struct pagevec pvec;
        pgoff_t next;
        int i, freed = 0;
+       long lookup_nr = PAGEVEC_SIZE;
 
        pagevec_init(&pvec, 0);
        next = start;
-       while (1) {
-               if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+       while (next < end) {
+               /*
+                * Make sure to never grab more pages that we
+                * might possibly need.
+                */
+               if (end - next < lookup_nr)
+                       lookup_nr = end - next;
+
+               /*
+                * This pagevec_lookup() may return pages past 'end',
+                * so we must check for page->index > end.
+                */
+               if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) {
                        if (next == start)
                                break;
                        next = start;
@@ -347,6 +360,11 @@ static void truncate_hugepages(struct inode *inode, loff_t 
lstart)
                        struct page *page = pvec.pages[i];
 
                        lock_page(page);
+                       if (page->index >= end) {
+                               unlock_page(page);
+                               next = end;     /* we are done */
+                               break;
+                       }
                        if (page->index > next)
                                next = page->index;
                        ++next;
@@ -356,15 +374,14 @@ static void truncate_hugepages(struct inode *inode, 
loff_t lstart)
                }
                huge_pagevec_release(&pvec);
        }
-       BUG_ON(!lstart && mapping->nrpages);
-       hugetlb_unreserve_pages(inode, start, freed);
+       hugetlb_unreserve_pages(inode, start, end, freed);
 }
 
 static void hugetlbfs_evict_inode(struct inode *inode)
 {
        struct resv_map *resv_map;
 
-       truncate_hugepages(inode, 0);
+       truncate_hugepages(inode, 0, -1);
        resv_map = (struct resv_map *)inode->i_mapping->private_data;
        /* root inode doesn't have the resv_map, so we should check it */
        if (resv_map)
@@ -410,7 +427,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t 
offset)
        if (!RB_EMPTY_ROOT(&mapping->i_mmap))
                hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
        i_mmap_unlock_write(mapping);
-       truncate_hugepages(inode, offset);
+       truncate_hugepages(inode, offset, -1);
        return 0;
 }
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7b57850..de39705 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -75,7 +75,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct 
*vma,
 int hugetlb_reserve_pages(struct inode *inode, long from, long to,
                                                struct vm_area_struct *vma,
                                                vm_flags_t vm_flags);
-void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
+void hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+                                               long freed);
 int dequeue_hwpoisoned_huge_page(struct page *page);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c41b2a0..31e36cd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -257,6 +257,77 @@ out_nrg:
        return chg;
 }
 
+static long region_del(struct resv_map *resv, long f, long t)
+{
+       struct list_head *head = &resv->regions;
+       struct file_region *rg, *trg;
+       struct file_region *nrg = NULL;
+       long chg = 0;
+
+       /*
+        * Locate segments we overlap and etiher split, remove or
+        * trim the existing regions.  The end of region (t) == -1
+        * indicates all remaining regions.  Special case t == -1 as
+        * all comparisons are signed.
+        */
+       if (t == -1)
+               t = LONG_MAX;
+retry:
+       spin_lock(&resv->lock);
+       list_for_each_entry_safe(rg, trg, head, link) {
+               if (rg->to <= f)
+                       continue;
+               if (rg->from >= t)
+                       break;
+
+               if (f > rg->from && t < rg->to) { /* must split region */
+                       if (!nrg) {
+                               spin_unlock(&resv->lock);
+                               nrg = kmalloc(sizeof(*nrg),
+                                               GFP_KERNEL |  __GFP_REPEAT);
+                               if (!nrg) {
+                                       /* FIXME FIXME FIXME FIXME */
+                                       return -ENOMEM;
+                               }
+                               goto retry;
+                       }
+
+                       chg += t - f;
+
+                       /* new entry for end of split region */
+                       nrg->from = t;
+                       nrg->to = rg->to;
+                       INIT_LIST_HEAD(&nrg->link);
+
+                       /* original entry is trimmed */
+                       rg->to = f;
+
+                       list_add(&nrg->link, &rg->link);
+                       nrg = NULL;
+                       break;
+               }
+
+               if (f <= rg->from && t >= rg->to) { /* remove entire region */
+                       chg += rg->to - rg->from;
+                       list_del(&rg->link);
+                       kfree(rg);
+                       continue;
+               }
+
+               if (f <= rg->from) {    /* trim beginning of region */
+                       chg += t - rg->from;
+                       rg->from = t;
+               } else {                /* trim end of region */
+                       chg += rg->to - f;
+                       rg->to = f;
+               }
+       }
+
+       spin_unlock(&resv->lock);
+       kfree(nrg);
+       return chg;
+}
+
 static long region_truncate(struct resv_map *resv, long end)
 {
        struct list_head *head = &resv->regions;
@@ -3510,7 +3581,8 @@ out_err:
        return ret;
 }
 
-void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
+void hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+                                                               long freed)
 {
        struct hstate *h = hstate_inode(inode);
        struct resv_map *resv_map = inode_resv_map(inode);
@@ -3518,7 +3590,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long 
offset, long freed)
        struct hugepage_subpool *spool = subpool_inode(inode);
 
        if (resv_map)
-               chg = region_truncate(resv_map, offset);
+               chg = region_del(resv_map, start, end);
        spin_lock(&inode->i_lock);
        inode->i_blocks -= (blocks_per_huge_page(h) * freed);
        spin_unlock(&inode->i_lock);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to