Hi,

I'm working on a bit of code that vmaps/vunmaps a bit more often than
I'd like. I've implemented a frontend to cache commonly used mappings,
which solves most of the problem, but in looking various other ways to
get the last bit of performance, I thought might be generally helpful
to batch up vunmap driven TLB flushes. So I'll just throw the idea
out there (is anyone else doing a lot of vmapping? I'd like to hear
from you!).

Anyway, the idea is just that we don't free up the virtual address space
immediately but wait until we've collected a batch of them, and free
them all at once and only flush the TLBs once per batch.

We are able to free the pages at vfree-time, because although we may
still have TLBs pointing to them, it would be a kernel bug to access
those TLBs at this stage (AFAIKS, we still do need to flush the cache
at vunmap-time, however).

And we are able to flush at vmap-time if we run out of virtual area.

So the cost is pretty small -- with 128 deferred regions sitting there,
it's maybe like 8K worth of struct vm_structs.

Here is a rough hack. Comments?

--

Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c
+++ linux-2.6/mm/vmalloc.c
@@ -24,8 +24,13 @@
 DEFINE_RWLOCK(vmlist_lock);
 struct vm_struct *vmlist;
 
+#define LAZY_MAX 128
+static unsigned long lazy_start = -1UL, lazy_end = 0;
+static unsigned int lazy_nr;
+
 static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
                            int node);
+static void __purge_vm_area_lazy(void);
 
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
 {
@@ -68,23 +73,33 @@ static inline void vunmap_pud_range(pgd_
        } while (pud++, addr = next, addr != end);
 }
 
-void unmap_kernel_range(unsigned long addr, unsigned long size)
+/*
+ * This function does not flush pagetables itself.
+ */
+static void __unmap_kernel_range(unsigned long addr, unsigned long end)
 {
        pgd_t *pgd;
        unsigned long next;
-       unsigned long start = addr;
-       unsigned long end = addr + size;
 
        BUG_ON(addr >= end);
        pgd = pgd_offset_k(addr);
-       flush_cache_vunmap(addr, end);
        do {
                next = pgd_addr_end(addr, end);
                if (pgd_none_or_clear_bad(pgd))
                        continue;
                vunmap_pud_range(pgd, addr, next);
        } while (pgd++, addr = next, addr != end);
-       flush_tlb_kernel_range(start, end);
+}
+
+void unmap_kernel_range(unsigned long addr, unsigned long size)
+{
+       unsigned long end = addr + size;
+
+       BUG_ON(addr >= end);
+
+       flush_cache_vunmap(addr, end);
+       __unmap_kernel_range(addr, end);
+       flush_tlb_kernel_range(addr, end);
 }
 
 static void unmap_vm_area(struct vm_struct *area)
@@ -200,6 +215,7 @@ static struct vm_struct *__get_vm_area_n
        size += PAGE_SIZE;
 
        write_lock(&vmlist_lock);
+retry:
        for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
                if ((unsigned long)tmp->addr < addr) {
                        if((unsigned long)tmp->addr + tmp->size >= addr)
@@ -215,7 +231,7 @@ static struct vm_struct *__get_vm_area_n
                if (addr > end - size)
                        goto out;
        }
-
+       /* XXX: should have addr > end - size check here */
 found:
        area->next = *p;
        *p = area;
@@ -231,6 +247,11 @@ found:
        return area;
 
 out:
+       if (lazy_nr) {
+               __purge_vm_area_lazy();
+               addr = ALIGN(start, align);
+               goto retry;
+       }
        write_unlock(&vmlist_lock);
        kfree(area);
        if (printk_ratelimit())
@@ -291,13 +312,64 @@ static struct vm_struct *__remove_vm_are
        return NULL;
 
 found:
+       BUG_ON(tmp->flags & VM_LAZYFREE);
        unmap_vm_area(tmp);
        *p = tmp->next;
 
-       /*
-        * Remove the guard page.
-        */
-       tmp->size -= PAGE_SIZE;
+       return tmp;
+}
+
+static void __purge_vm_area_lazy(void)
+{
+       struct vm_struct **p, *tmp;
+
+       p = &vmlist;
+       while ((tmp = *p) != NULL) {
+               if (tmp->flags & VM_LAZYFREE) {
+                       unsigned long start = (unsigned long)tmp->addr;
+                       unsigned long end = start + tmp->size;
+
+                       BUG_ON(start < lazy_start);
+                       BUG_ON(end > lazy_end);
+
+                       *p = tmp->next;
+                       __unmap_kernel_range(start, end);
+                       kfree(tmp);
+                       lazy_nr--;
+               } else
+                       p = &tmp->next;
+       }
+       flush_tlb_kernel_range(lazy_start, lazy_end);
+       BUG_ON(lazy_nr != 0);
+
+       lazy_end = 0;
+       lazy_start = -1UL;
+}
+
+static struct vm_struct *__remove_vm_area_lazy(void *addr)
+{
+       struct vm_struct *tmp;
+
+       tmp = __find_vm_area(addr);
+       if (tmp) {
+               unsigned long start, end;
+
+               if (tmp->flags & VM_LAZYFREE)
+                       return NULL; /* shouldn't happen */
+
+               start = (unsigned long)tmp->addr;
+               end = start + tmp->size;
+
+               flush_cache_vunmap(start, end);
+
+               tmp->flags |= VM_LAZYFREE;
+               if (start < lazy_start)
+                       lazy_start = start;
+               if (end > lazy_end)
+                       lazy_end = end;
+               lazy_nr++;
+       }
+
        return tmp;
 }
 
@@ -321,6 +393,8 @@ struct vm_struct *remove_vm_area(void *a
 static void __vunmap(void *addr, int deallocate_pages)
 {
        struct vm_struct *area;
+       struct page **pages;
+       int nrpages, vpages;
 
        if (!addr)
                return;
@@ -331,32 +405,40 @@ static void __vunmap(void *addr, int dea
                return;
        }
 
-       area = remove_vm_area(addr);
+       write_lock(&vmlist_lock);
+       area = __remove_vm_area_lazy(addr);
        if (unlikely(!area)) {
+               write_unlock(&vmlist_lock);
                printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
                                addr);
                WARN_ON(1);
                return;
        }
 
-       debug_check_no_locks_freed(addr, area->size);
+       debug_check_no_locks_freed(addr, area->size - PAGE_SIZE);
+
+       pages = area->pages;
+       nrpages = area->nr_pages;
+       vpages = area->flags & VM_VPAGES;
+
+       if (lazy_nr > LAZY_MAX)
+               __purge_vm_area_lazy();
+
+       write_unlock(&vmlist_lock);
 
        if (deallocate_pages) {
                int i;
 
-               for (i = 0; i < area->nr_pages; i++) {
-                       BUG_ON(!area->pages[i]);
-                       __free_page(area->pages[i]);
+               for (i = 0; i < nrpages; i++) {
+                       BUG_ON(!pages[i]);
+                       __free_page(pages[i]);
                }
 
-               if (area->flags & VM_VPAGES)
-                       vfree(area->pages);
+               if (vpages)
+                       vfree(pages);
                else
-                       kfree(area->pages);
+                       kfree(pages);
        }
-
-       kfree(area);
-       return;
 }
 
 /**
Index: linux-2.6/include/linux/vmalloc.h
===================================================================
--- linux-2.6.orig/include/linux/vmalloc.h
+++ linux-2.6/include/linux/vmalloc.h
@@ -12,6 +12,7 @@ struct vm_area_struct;
 #define VM_MAP         0x00000004      /* vmap()ed pages */
 #define VM_USERMAP     0x00000008      /* suitable for remap_vmalloc_range */
 #define VM_VPAGES      0x00000010      /* buffer for pages was vmalloc'ed */
+#define VM_LAZYFREE    0x00000020      /* area is unmapped lazily */
 /* bits [20..32] reserved for arch specific ioremap internals */
 
 /*
-
To unsubscribe from this list: send the line "unsubscribe linux-arch" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to