Jan Kiszka wrote: > Philippe Gerum wrote: >> On Tue, 2007-08-07 at 17:09 +0200, Gilles Chanteperdrix wrote: >>> On 8/7/07, Jan Kiszka <[EMAIL PROTECTED]> wrote: >>>> Gilles Chanteperdrix wrote: >>>>> The fact that you are in a hurry should not be an excuse to propose a >>>>> fix which is much worse than the bug itself. >>>> Please explain. >>> Using GFP_ATOMIC is not a good idea, because the kernel needs the >>> GFP_ATOMIC pools for allocation that take place from really atomic >>> contexts such as interruption handlers for instance. fork should not >>> be an atomic context. >>> >>> So the only reason I see why you propose this patch is because you are >>> in a hurry. Ok, before we fix this properly, you can use GFP_ATOMIC >>> locally, but please do not make >>> this an official patch. >>> >> I'm afraid you are both right. Draining pages from the atomic pool might >> starve truely atomic contexts, and releasing the lock guarding the >> pagetable pages for the source mm seems contradictory with what the >> current code tries to achieve in holding it. >> >> Here is a patch preallocating the page for the cow-breaking code, which >> relies on the lock breaking pattern already in place. Not exactly >> pretty, slightly tested here (UP, spinlock debug), but looks ok so far. >> Needs SMP testing before any attempt is made to merge it. > > Funny. I just finished my own patch of this kind and made it pass a > basic COW stress test here. Find it below, it is very similar. Will > now analyse yours to see which one looks nicer and/or is more correct.
Yours is clearly nicer, but it required/allowed a few more cleanups. Find my variant attached (against our local 2.6.20 kernel). From my POV: problem solved. 8) Jan
---
mm/memory.c | 55 ++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 38 insertions(+), 17 deletions(-)
Index: linux-2.6.20.15/mm/memory.c
===================================================================
--- linux-2.6.20.15.orig/mm/memory.c
+++ linux-2.6.20.15/mm/memory.c
@@ -453,10 +453,10 @@ static inline void cow_user_page(struct
* covered by this vma.
*/
-static inline int
+static inline void
copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
- unsigned long addr, int *rss)
+ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+ unsigned long addr, int *rss, struct page *uncow_page)
{
unsigned long vm_flags = vma->vm_flags;
pte_t pte = *src_pte;
@@ -496,21 +496,17 @@ copy_one_pte(struct mm_struct *dst_mm, s
*/
if (is_cow_mapping(vm_flags)) {
#ifdef CONFIG_IPIPE
- if (((vm_flags|src_mm->def_flags) & (VM_LOCKED|VM_PINNED)) ==
(VM_LOCKED|VM_PINNED)) {
+ if (uncow_page) {
struct page *old_page = vm_normal_page(vma, addr, pte);
- page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
- if (!page)
- return -ENOMEM;
+ cow_user_page(uncow_page, old_page, addr, vma);
+ pte = mk_pte(uncow_page, vma->vm_page_prot);
- cow_user_page(page, old_page, addr, vma);
- pte = mk_pte(page, vma->vm_page_prot);
-
if (vm_flags & VM_SHARED)
pte = pte_mkclean(pte);
pte = pte_mkold(pte);
- page_dup_rmap(page);
- rss[!!PageAnon(page)]++;
+ page_dup_rmap(uncow_page);
+ rss[!!PageAnon(uncow_page)]++;
goto out_set_pte;
}
#endif /* CONFIG_IPIPE */
@@ -535,7 +531,6 @@ copy_one_pte(struct mm_struct *dst_mm, s
out_set_pte:
set_pte_at(dst_mm, addr, dst_pte, pte);
- return 0;
}
static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -546,12 +541,27 @@ static int copy_pte_range(struct mm_stru
spinlock_t *src_ptl, *dst_ptl;
int progress = 0;
int rss[2];
+#ifdef CONFIG_IPIPE
+ int do_cow_break = 0;
+#endif
+ struct page *uncow_page = NULL;
again:
+#ifdef CONFIG_IPIPE
+ if (do_cow_break) {
+ uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+ if (!uncow_page)
+ return -ENOMEM;
+ do_cow_break = 0;
+ }
+#endif
rss[1] = rss[0] = 0;
dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
- if (!dst_pte)
+ if (!dst_pte) {
+ if (uncow_page)
+ page_cache_release(uncow_page);
return -ENOMEM;
+ }
src_pte = pte_offset_map_nested(src_pmd, addr);
src_ptl = pte_lockptr(src_mm, src_pmd);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -573,9 +583,20 @@ again:
progress++;
continue;
}
- if (copy_one_pte(dst_mm, src_mm, dst_pte,
- src_pte, vma, addr, rss))
- return -ENOMEM;
+#ifdef CONFIG_IPIPE
+ if (likely(uncow_page == NULL) &&
likely(pte_present(*src_pte))) {
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (((vma->vm_flags|src_mm->def_flags) &
(VM_LOCKED|VM_PINNED))
+ == (VM_LOCKED|VM_PINNED)) {
+ do_cow_break = 1;
+ break;
+ }
+ }
+ }
+#endif
+ copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
+ vma, addr, rss, uncow_page);
+ uncow_page = NULL;
progress += 8;
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
signature.asc
Description: OpenPGP digital signature
_______________________________________________ Adeos-main mailing list [email protected] https://mail.gna.org/listinfo/adeos-main
