Jan Kiszka wrote:
> Philippe Gerum wrote:
>> On Tue, 2007-08-07 at 17:09 +0200, Gilles Chanteperdrix wrote:
>>> On 8/7/07, Jan Kiszka <[EMAIL PROTECTED]> wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> The fact that you are in a hurry should not be an excuse to propose a
>>>>> fix which is much worse than the bug itself.
>>>> Please explain.
>>> Using GFP_ATOMIC is not a good idea, because the kernel needs the
>>> GFP_ATOMIC pools for allocation that take place from really atomic
>>> contexts such as interruption handlers for instance. fork should not
>>> be an atomic context.
>>>
>>> So the only reason I see why you propose this patch is because you are
>>> in a hurry. Ok, before we fix this properly, you can use GFP_ATOMIC
>>> locally, but please do not make
>>> this an official patch.
>>>
>> I'm afraid you are both right. Draining pages from the atomic pool might
>> starve truely atomic contexts, and releasing the lock guarding the
>> pagetable pages for the source mm seems contradictory with what the
>> current code tries to achieve in holding it.
>>
>> Here is a patch preallocating the page for the cow-breaking code, which
>> relies on the lock breaking pattern already in place. Not exactly
>> pretty, slightly tested here (UP, spinlock debug), but looks ok so far.
>> Needs SMP testing before any attempt is made to merge it.
> 
> Funny. I just finished my own patch of this kind and made it pass a
> basic COW stress test here. Find it below, it is very similar. Will
> now analyse yours to see which one looks nicer and/or is more correct.

Yours is clearly nicer, but it required/allowed a few more cleanups.
Find my variant attached (against our local 2.6.20 kernel).

From my POV: problem solved. 8)

Jan
---
 mm/memory.c |   55 ++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 17 deletions(-)

Index: linux-2.6.20.15/mm/memory.c
===================================================================
--- linux-2.6.20.15.orig/mm/memory.c
+++ linux-2.6.20.15/mm/memory.c
@@ -453,10 +453,10 @@ static inline void cow_user_page(struct 
  * covered by this vma.
  */
 
-static inline int
+static inline void
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-               pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-               unsigned long addr, int *rss)
+            pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+            unsigned long addr, int *rss, struct page *uncow_page)
 {
        unsigned long vm_flags = vma->vm_flags;
        pte_t pte = *src_pte;
@@ -496,21 +496,17 @@ copy_one_pte(struct mm_struct *dst_mm, s
         */
        if (is_cow_mapping(vm_flags)) {
 #ifdef CONFIG_IPIPE
-               if (((vm_flags|src_mm->def_flags) & (VM_LOCKED|VM_PINNED)) == 
(VM_LOCKED|VM_PINNED)) {
+               if (uncow_page) {
                        struct page *old_page = vm_normal_page(vma, addr, pte);
-                       page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
-                       if (!page)
-                               return -ENOMEM;
+                       cow_user_page(uncow_page, old_page, addr, vma);
+                       pte = mk_pte(uncow_page, vma->vm_page_prot);
 
-                       cow_user_page(page, old_page, addr, vma);
-                       pte = mk_pte(page, vma->vm_page_prot);
-                       
                        if (vm_flags & VM_SHARED)
                                pte = pte_mkclean(pte);
                        pte = pte_mkold(pte);
 
-                       page_dup_rmap(page);
-                       rss[!!PageAnon(page)]++;
+                       page_dup_rmap(uncow_page);
+                       rss[!!PageAnon(uncow_page)]++;
                        goto out_set_pte;
                }
 #endif /* CONFIG_IPIPE */
@@ -535,7 +531,6 @@ copy_one_pte(struct mm_struct *dst_mm, s
 
 out_set_pte:
        set_pte_at(dst_mm, addr, dst_pte, pte);
-       return 0;
 }
 
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -546,12 +541,27 @@ static int copy_pte_range(struct mm_stru
        spinlock_t *src_ptl, *dst_ptl;
        int progress = 0;
        int rss[2];
+#ifdef CONFIG_IPIPE
+       int do_cow_break = 0;
+#endif
+       struct page *uncow_page = NULL;
 
 again:
+#ifdef CONFIG_IPIPE
+       if (do_cow_break) {
+               uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+               if (!uncow_page)
+                       return -ENOMEM;
+               do_cow_break = 0;
+       }
+#endif
        rss[1] = rss[0] = 0;
        dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
-       if (!dst_pte)
+       if (!dst_pte) {
+               if (uncow_page)
+                       page_cache_release(uncow_page);
                return -ENOMEM;
+       }
        src_pte = pte_offset_map_nested(src_pmd, addr);
        src_ptl = pte_lockptr(src_mm, src_pmd);
        spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -573,9 +583,20 @@ again:
                        progress++;
                        continue;
                }
-               if (copy_one_pte(dst_mm, src_mm, dst_pte,
-                                src_pte, vma, addr, rss))
-                       return -ENOMEM;
+#ifdef CONFIG_IPIPE
+               if (likely(uncow_page == NULL) && 
likely(pte_present(*src_pte))) {
+                       if (is_cow_mapping(vma->vm_flags)) {
+                               if (((vma->vm_flags|src_mm->def_flags) & 
(VM_LOCKED|VM_PINNED))
+                                   == (VM_LOCKED|VM_PINNED)) {
+                                       do_cow_break = 1;
+                                       break;
+                               }
+                       }
+               }
+#endif
+               copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
+                            vma, addr, rss, uncow_page);
+               uncow_page = NULL;
                progress += 8;
        } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Reply via email to