Hi,
after testing on ARM, here is the latest version of the nocow patch,
split in three parts, the noarch part, the x86 specific patch and the
arm specific patch.
--
Gilles Chanteperdrix
diff -Naurdp -x '*~' -x '*.orig' -x '*.rej' ipipe-2.6.19-arm/include/linux/ipipe.h ipipe-2.6.19-arm-nocow/include/linux/ipipe.h
--- ipipe-2.6.19-arm/include/linux/ipipe.h 2007-01-15 21:33:00.000000000 +0100
+++ ipipe-2.6.19-arm-nocow/include/linux/ipipe.h 2007-01-30 21:22:26.769349729 +0100
@@ -337,6 +337,15 @@ int fastcall __ipipe_dispatch_wired(stru
void fastcall __ipipe_sync_stage(unsigned long syncmask);
+int __ipipe_update_all_pinned_mm(unsigned long start, unsigned long end);
+
+struct mm_struct;
+
+void __ipipe_unlink_pinned_mm(struct mm_struct *mm);
+
+int __ipipe_pin_range_mapping(struct mm_struct *mm,
+ unsigned long start, unsigned long end);
+
#ifndef __ipipe_sync_pipeline
#define __ipipe_sync_pipeline(syncmask) __ipipe_sync_stage(syncmask)
#endif
@@ -434,12 +443,11 @@ static inline void ipipe_init_notify(str
__ipipe_dispatch_event(IPIPE_EVENT_INIT,p);
}
-struct mm_struct;
-
static inline void ipipe_cleanup_notify(struct mm_struct *mm)
{
if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP))
__ipipe_dispatch_event(IPIPE_EVENT_CLEANUP,mm);
+ __ipipe_unlink_pinned_mm(mm);
}
/* Public interface */
@@ -643,6 +651,8 @@ int fastcall ipipe_set_ptd(int key,
void fastcall *ipipe_get_ptd(int key);
+int ipipe_disable_ondemand_mappings(struct task_struct *tsk);
+
#define local_irq_enable_hw_cond() local_irq_enable_hw()
#define local_irq_disable_hw_cond() local_irq_disable_hw()
#define local_irq_save_hw_cond(flags) local_irq_save_hw(flags)
@@ -690,6 +700,7 @@ void fastcall *ipipe_get_ptd(int key);
#define ipipe_cleanup_notify(mm) do { } while(0)
#define ipipe_trap_notify(t,r) 0
#define ipipe_init_proc() do { } while(0)
+#define __ipipe_update_all_pinned_mm(start, end) 0
#define local_irq_enable_hw_cond() do { } while(0)
#define local_irq_disable_hw_cond() do { } while(0)
diff -Naurdp -x '*~' -x '*.orig' -x '*.rej' ipipe-2.6.19-arm/include/linux/mm.h ipipe-2.6.19-arm-nocow/include/linux/mm.h
--- ipipe-2.6.19-arm/include/linux/mm.h 2007-01-04 22:05:12.000000000 +0100
+++ ipipe-2.6.19-arm-nocow/include/linux/mm.h 2007-01-30 21:22:26.769349729 +0100
@@ -166,6 +166,7 @@ extern unsigned int kobjsize(const void
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
+#define VM_PINNED 0x10000000 /* Disable faults for the vma */
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
diff -Naurdp -x '*~' -x '*.orig' -x '*.rej' ipipe-2.6.19-arm/include/linux/sched.h ipipe-2.6.19-arm-nocow/include/linux/sched.h
--- ipipe-2.6.19-arm/include/linux/sched.h 2007-01-15 21:33:00.000000000 +0100
+++ ipipe-2.6.19-arm-nocow/include/linux/sched.h 2007-01-30 21:22:26.770349605 +0100
@@ -363,6 +363,10 @@ struct mm_struct {
/* aio bits */
rwlock_t ioctx_list_lock;
struct kioctx *ioctx_list;
+
+#if CONFIG_IPIPE
+ struct list_head pinned;
+#endif /* CONFIG_IPIPE */
};
struct sighand_struct {
diff -Naurdp -x '*~' -x '*.orig' -x '*.rej' ipipe-2.6.19-arm/lib/ioremap.c ipipe-2.6.19-arm-nocow/lib/ioremap.c
--- ipipe-2.6.19-arm/lib/ioremap.c 2007-01-15 21:33:01.000000000 +0100
+++ ipipe-2.6.19-arm-nocow/lib/ioremap.c 2007-01-30 21:22:26.771349480 +0100
@@ -85,10 +85,9 @@ int ioremap_page_range(unsigned long add
err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
if (err)
break;
- set_pgdir(addr, *pgd);
} while (pgd++, addr = next, addr != end);
-
- flush_cache_vmap(start, end);
+ __ipipe_update_all_pinned_mm(start, end);
+ flush_cache_vmap(start, end);
return err;
}
diff -Naurdp -x '*~' -x '*.orig' -x '*.rej' ipipe-2.6.19-arm/mm/memory.c ipipe-2.6.19-arm-nocow/mm/memory.c
--- ipipe-2.6.19-arm/mm/memory.c 2007-01-04 22:05:15.000000000 +0100
+++ ipipe-2.6.19-arm-nocow/mm/memory.c 2007-01-30 23:35:51.960412122 +0100
@@ -50,6 +50,7 @@
#include <linux/delayacct.h>
#include <linux/init.h>
#include <linux/writeback.h>
+#include <linux/vmalloc.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
@@ -418,13 +419,41 @@ struct page *vm_normal_page(struct vm_ar
return pfn_to_page(pfn);
}
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va)
+{
+ /*
+ * If the source page was a PFN mapping, we don't have
+ * a "struct page" for it. We do a best-effort copy by
+ * just copying from the original user address. If that
+ * fails, we just zero-fill it. Live with it.
+ */
+ if (unlikely(!src)) {
+ void *kaddr = kmap_atomic(dst, KM_USER0);
+ void __user *uaddr = (void __user *)(va & PAGE_MASK);
+
+ /*
+ * This really shouldn't fail, because the page is there
+ * in the page tables. But it might just be unreadable,
+ * in which case we just give up and fill the result with
+ * zeroes.
+ */
+ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
+ memset(kaddr, 0, PAGE_SIZE);
+ kunmap_atomic(kaddr, KM_USER0);
+ flush_dcache_page(dst);
+ return;
+
+ }
+ copy_user_highpage(dst, src, va);
+}
+
/*
* copy one vm_area from one task to the other. Assumes the page tables
* already present in the new task to be cleared in the whole range
* covered by this vma.
*/
-static inline void
+static inline int
copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
unsigned long addr, int *rss)
@@ -466,6 +495,25 @@ copy_one_pte(struct mm_struct *dst_mm, s
* in the parent and the child
*/
if (is_cow_mapping(vm_flags)) {
+#ifdef CONFIG_IPIPE
+ if (((vm_flags|src_mm->def_flags) & (VM_LOCKED|VM_PINNED)) == (VM_LOCKED|VM_PINNED)) {
+ struct page *old_page = vm_normal_page(vma, addr, pte);
+ page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+ if (!page)
+ return -ENOMEM;
+
+ cow_user_page(page, old_page, addr);
+ pte = mk_pte(page, vma->vm_page_prot);
+
+ if (vm_flags & VM_SHARED)
+ pte = pte_mkclean(pte);
+ pte = pte_mkold(pte);
+
+ page_dup_rmap(page);
+ rss[!!PageAnon(page)]++;
+ goto out_set_pte;
+ }
+#endif /* CONFIG_IPIPE */
ptep_set_wrprotect(src_mm, addr, src_pte);
pte = pte_wrprotect(pte);
}
@@ -487,6 +535,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
out_set_pte:
set_pte_at(dst_mm, addr, dst_pte, pte);
+ return 0;
}
static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -524,7 +573,9 @@ again:
progress++;
continue;
}
- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+ if (copy_one_pte(dst_mm, src_mm, dst_pte,
+ src_pte, vma, addr, rss))
+ return -ENOMEM;
progress += 8;
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
@@ -1431,34 +1482,6 @@ static inline pte_t maybe_mkwrite(pte_t
return pte;
}
-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va)
-{
- /*
- * If the source page was a PFN mapping, we don't have
- * a "struct page" for it. We do a best-effort copy by
- * just copying from the original user address. If that
- * fails, we just zero-fill it. Live with it.
- */
- if (unlikely(!src)) {
- void *kaddr = kmap_atomic(dst, KM_USER0);
- void __user *uaddr = (void __user *)(va & PAGE_MASK);
-
- /*
- * This really shouldn't fail, because the page is there
- * in the page tables. But it might just be unreadable,
- * in which case we just give up and fill the result with
- * zeroes.
- */
- if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
- memset(kaddr, 0, PAGE_SIZE);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(dst);
- return;
-
- }
- copy_user_highpage(dst, src, va);
-}
-
/*
* This routine handles present pages, when users try to write
* to a shared page. It is done by copying the page to a new address
@@ -2676,3 +2699,157 @@ int access_process_vm(struct task_struct
return buf - old_buf;
}
+
+#ifdef CONFIG_IPIPE
+static LIST_HEAD(pinned_mms);
+static DEFINE_RWLOCK(pinned_mms_lock);
+
+static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+{
+ spinlock_t *ptl;
+ pte_t *pte;
+
+ do {
+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ if (!pte)
+ continue;
+
+ if (!pte_present(*pte)) {
+ pte_unmap_unlock(pte, ptl);
+ continue;
+ }
+
+ if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM)
+ return -ENOMEM;
+ } while (addr += PAGE_SIZE, addr != end);
+ return 0;
+}
+
+static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud,
+ struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (ipipe_pin_pte_range(mm, pmd, vma, addr, end))
+ return -ENOMEM;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pud_t *pud;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (ipipe_pin_pmd_range(mm, pud, vma, addr, end))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+}
+
+int ipipe_disable_ondemand_mappings(struct task_struct *tsk)
+{
+ unsigned long addr, next, end;
+ struct vm_area_struct *vma;
+ struct vm_struct *area;
+ struct mm_struct *mm;
+ int result = 0;
+ pgd_t *pgd;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return -EPERM;
+
+ down_write(&mm->mmap_sem);
+ if (mm->def_flags & VM_PINNED)
+ goto up_mmap_sem_done;
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (!is_cow_mapping(vma->vm_flags))
+ continue;
+
+ addr = vma->vm_start;
+ end = vma->vm_end;
+
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (ipipe_pin_pud_range(mm, pgd, vma, addr, next)) {
+ result = -ENOMEM;
+ up_mmap_sem_done:
+ up_write(&mm->mmap_sem);
+ goto done_mm;
+ }
+ } while (pgd++, addr = next, addr != end);
+ }
+ mm->def_flags |= VM_PINNED;
+ up_write(&mm->mmap_sem);
+
+ read_lock(&vmlist_lock);
+ down_write(&mm->mmap_sem);
+ for (area = vmlist; area; area = area->next) {
+ result = __ipipe_pin_range_mapping(mm,
+ (unsigned long) area->addr,
+ (unsigned long) area->addr
+ + area->size);
+ if (result) {
+ mm->def_flags &= ~VM_PINNED;
+ up_write(&mm->mmap_sem);
+ goto done_vmlist;
+ }
+ }
+ up_write(&mm->mmap_sem);
+
+ write_lock(&pinned_mms_lock);
+ list_add(&mm->pinned, &pinned_mms);
+ write_unlock(&pinned_mms_lock);
+
+ done_vmlist:
+ read_unlock(&vmlist_lock);
+ done_mm:
+ mmput(mm);
+ return result;
+}
+
+EXPORT_SYMBOL(ipipe_disable_ondemand_mappings);
+
+int __ipipe_update_all_pinned_mm(unsigned long start, unsigned long end)
+{
+ struct mm_struct *mm;
+ int result = 0;
+
+ read_lock(&pinned_mms_lock);
+ list_for_each_entry(mm, &pinned_mms, pinned) {
+ down_write(&mm->mmap_sem);
+ result = __ipipe_pin_range_mapping(mm, start, end);
+ up_write(&mm->mmap_sem);
+
+ if (result)
+ break;
+ }
+ read_unlock(&pinned_mms_lock);
+
+ return result;
+}
+
+void __ipipe_unlink_pinned_mm(struct mm_struct *mm)
+{
+ if (mm->def_flags & VM_PINNED) {
+ write_lock(&pinned_mms_lock);
+ list_del(&mm->pinned);
+ write_unlock(&pinned_mms_lock);
+ }
+}
+#endif
diff -Naurdp -x '*~' -x '*.orig' -x '*.rej' ipipe-2.6.19-arm/mm/mlock.c ipipe-2.6.19-arm-nocow/mm/mlock.c
--- ipipe-2.6.19-arm/mm/mlock.c 2006-05-07 16:42:15.000000000 +0200
+++ ipipe-2.6.19-arm-nocow/mm/mlock.c 2007-01-30 21:22:26.772349356 +0100
@@ -166,7 +166,7 @@ static int do_mlockall(int flags)
if (flags & MCL_FUTURE)
def_flags = VM_LOCKED;
- current->mm->def_flags = def_flags;
+ current->mm->def_flags |= def_flags;
if (flags == MCL_FUTURE)
goto out;
diff -Naurdp -x '*~' -x '*.orig' -x '*.rej' ipipe-2.6.19-arm/mm/vmalloc.c ipipe-2.6.19-arm-nocow/mm/vmalloc.c
--- ipipe-2.6.19-arm/mm/vmalloc.c 2007-01-15 21:33:01.000000000 +0100
+++ ipipe-2.6.19-arm-nocow/mm/vmalloc.c 2007-01-30 21:22:26.773349232 +0100
@@ -152,15 +152,12 @@ int map_vm_area(struct vm_struct *area,
BUG_ON(addr >= end);
pgd = pgd_offset_k(addr);
do {
- pgd_t oldpgd;
- memcpy(&oldpgd,pgd,sizeof(pgd_t));
next = pgd_addr_end(addr, end);
err = vmap_pud_range(pgd, addr, next, prot, pages);
if (err)
break;
- if (pgd_val(oldpgd) != pgd_val(*pgd))
- set_pgdir(addr, *pgd);
} while (pgd++, addr = next, addr != end);
+ __ipipe_update_all_pinned_mm((unsigned long) area->addr, end);
flush_cache_vmap((unsigned long) area->addr, end);
return err;
}
--- ipipe-2.6.19/arch/i386/mm/fault.c 2007-01-10 09:44:52.000000000 +0100
+++ ipipe-2.6.19-nocow/arch/i386/mm/fault.c 2007-01-15 09:57:02.000000000 +0100
@@ -654,3 +654,18 @@ void vmalloc_sync_all(void)
}
}
#endif
+
+#ifdef CONFIG_IPIPE
+int __ipipe_pin_range_mapping(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long next, addr = start;
+
+ do {
+ next = pgd_addr_end(addr, end);
+ vmalloc_sync_one(mm->pgd, addr);
+ } while (addr = next, addr != end);
+
+ return 0;
+}
+#endif /* CONFIG_IPIPE */
--- ipipe-2.6.19/include/asm-i386/pgalloc.h 2007-01-10 09:44:53.000000000 +0100
+++ ipipe-2.6.19-nocow/include/asm-i386/pgalloc.h 2007-01-11 09:58:49.000000000 +0100
@@ -46,27 +46,4 @@ static inline void pte_free(struct page
#define check_pgt_cache() do { } while (0)
-static inline void set_pgdir(unsigned long address, pgd_t entry)
-{
-#ifdef CONFIG_IPIPE
- struct task_struct * p;
- struct page *page;
- pgd_t *pgd;
-
- read_lock(&tasklist_lock);
-
- for_each_process(p) {
- if(p->mm)
- *pgd_offset(p->mm,address) = entry;
- }
-
- read_unlock(&tasklist_lock);
-
- for (page = pgd_list; page; page = (struct page *)page->index) {
- pgd = (pgd_t *)page_address(page);
- pgd[address >> PGDIR_SHIFT] = entry;
- }
-#endif /* CONFIG_IPIPE */
-}
-
#endif /* _I386_PGALLOC_H */
--- ipipe-2.6.19-arm/arch/arm/mm/fault.c 2007-01-30 21:33:47.000000000 +0100
+++ ipipe-2.6.19-arm-nocow/arch/arm/mm/fault.c 2007-01-30 23:23:05.513766878 +0100
@@ -330,6 +330,9 @@ do_translation_fault(unsigned long addr,
if (addr < TASK_SIZE)
return do_page_fault(addr, fsr, regs);
+ if (ipipe_trap_notify(IPIPE_TRAP_ACCESS,regs))
+ return 0;
+
index = pgd_index(addr);
/*
@@ -354,9 +357,6 @@ do_translation_fault(unsigned long addr,
return 0;
bad_area:
- if (ipipe_trap_notify(IPIPE_TRAP_ACCESS,regs))
- return 0;
-
do_bad_area(addr, fsr, regs);
return 0;
}
@@ -479,3 +479,35 @@ do_PrefetchAbort(unsigned long addr, str
do_translation_fault(addr, 0, regs);
}
+#ifdef CONFIG_IPIPE
+static void vmalloc_sync_one(pgd_t *pgd, unsigned long addr)
+{
+ unsigned int index = pgd_index(addr);
+ pgd_t *pgd_k;
+ pmd_t *pmd, *pmd_k;
+
+ pgd += index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd))
+ set_pgd(pgd, *pgd_k);
+
+ pmd_k = pmd_offset(pgd_k, addr);
+ pmd = pmd_offset(pgd, addr);
+
+ copy_pmd(pmd, pmd_k);
+}
+
+int __ipipe_pin_range_mapping(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long next, addr = start;
+
+ do {
+ next = pgd_addr_end(addr, end);
+ vmalloc_sync_one(mm->pgd, addr);
+ } while (addr = next, addr != end);
+
+ return 0;
+}
+#endif /* CONFIG_IPIPE */
--- ipipe-2.6.19-arm/include/asm-arm/pgalloc.h 2007-01-30 23:47:15.711345662 +0100
+++ ipipe-2.6.19-arm-nocow/include/asm-arm/pgalloc.h 2007-01-30 23:43:39.759212585 +0100
@@ -23,11 +23,6 @@
#define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_USER))
#define _PAGE_KERNEL_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_KERNEL))
-static inline void set_pgdir(unsigned long address, pgd_t entry)
-{
- /* nop */
-}
-
/*
* Since we have only two-level page tables, these are trivial
*/
_______________________________________________
Xenomai-core mailing list
[email protected]
https://mail.gna.org/listinfo/xenomai-core