Re: [PATCH] general split_vma hugetlb fix

2005-02-11 Thread William Lee Irwin III
On Fri, Feb 11, 2005 at 08:06:08PM +, Hugh Dickins wrote:
> My recent do_munmap hugetlb fix has proved inadequate.  There are
> other places (madvise, mbind, mlock, mprotect) where split_vma is
> called.  Only mprotect excludes a hugetlb vma: the others are in
> danger of splitting at a misaligned address, causing later BUGs.
> So move the ~HPAGE_MASK check from do_munmap to split_vma itself;
> and fix up those places (madvise and mlock) which expect split_vma
> can fail only with -ENOMEM, and wish to convert that to -EAGAIN.
> (It appears genuine that some of these syscalls should be failing
> with -ENOMEM and some with -EAGAIN, so respect those behaviours.)
> madvise_dontneed doesn't use split_vma, but is equally in danger
> of causing a hugetlb BUG via zap_page_range.  Whereas elsewhere the
> patch is permissive (allowing the operation on a hugetlb vma even when
> pointless, so long as it doesn't missplit it), here we must use -EINVAL
> on any hugetlb vma, since a page fault would hit the BUG in its nopage.
> Signed-off-by: Hugh Dickins <[EMAIL PROTECTED]>

As usual, excellent work, Hugh. akpm, Linus, please apply.

Acked-by: William Irwin <[EMAIL PROTECTED]>


-- wli
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] general split_vma hugetlb fix

2005-02-11 Thread Hugh Dickins
My recent do_munmap hugetlb fix has proved inadequate.  There are
other places (madvise, mbind, mlock, mprotect) where split_vma is
called.  Only mprotect excludes a hugetlb vma: the others are in
danger of splitting at a misaligned address, causing later BUGs.

So move the ~HPAGE_MASK check from do_munmap to split_vma itself;
and fix up those places (madvise and mlock) which expect split_vma
can fail only with -ENOMEM, and wish to convert that to -EAGAIN.
(It appears genuine that some of these syscalls should be failing
with -ENOMEM and some with -EAGAIN, so respect those behaviours.)

madvise_dontneed doesn't use split_vma, but is equally in danger
of causing a hugetlb BUG via zap_page_range.  Whereas elsewhere the
patch is permissive (allowing the operation on a hugetlb vma even when
pointless, so long as it doesn't missplit it), here we must use -EINVAL
on any hugetlb vma, since a page fault would hit the BUG in its nopage.

Signed-off-by: Hugh Dickins <[EMAIL PROTECTED]>

--- 2.6.11-rc3-bk8/mm/madvise.c 2004-12-24 21:36:31.0 +
+++ linux/mm/madvise.c  2005-02-11 19:07:35.0 +
@@ -8,7 +8,7 @@
 #include 
 #include 
 #include 
-
+#include 
 
 /*
  * We can potentially split a vm area into separate
@@ -18,18 +18,18 @@ static long madvise_behavior(struct vm_a
 unsigned long end, int behavior)
 {
struct mm_struct * mm = vma->vm_mm;
-   int error;
+   int error = 0;
 
if (start != vma->vm_start) {
error = split_vma(mm, vma, start, 1);
if (error)
-   return -EAGAIN;
+   goto out;
}
 
if (end != vma->vm_end) {
error = split_vma(mm, vma, end, 0);
if (error)
-   return -EAGAIN;
+   goto out;
}
 
/*
@@ -48,7 +48,10 @@ static long madvise_behavior(struct vm_a
break;
}
 
-   return 0;
+out:
+   if (error == -ENOMEM)
+   error = -EAGAIN;
+   return error;
 }
 
 /*
@@ -94,7 +97,7 @@ static long madvise_willneed(struct vm_a
 static long madvise_dontneed(struct vm_area_struct * vma,
 unsigned long start, unsigned long end)
 {
-   if (vma->vm_flags & VM_LOCKED)
+   if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma))
return -EINVAL;
 
if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
--- 2.6.11-rc3-bk8/mm/mlock.c   2005-02-03 09:06:16.0 +
+++ linux/mm/mlock.c2005-02-11 19:07:35.0 +
@@ -21,17 +21,15 @@ static int mlock_fixup(struct vm_area_st
goto out;
 
if (start != vma->vm_start) {
-   if (split_vma(mm, vma, start, 1)) {
-   ret = -EAGAIN;
+   ret = split_vma(mm, vma, start, 1);
+   if (ret)
goto out;
-   }
}
 
if (end != vma->vm_end) {
-   if (split_vma(mm, vma, end, 0)) {
-   ret = -EAGAIN;
+   ret = split_vma(mm, vma, end, 0);
+   if (ret)
goto out;
-   }
}
 
/*
@@ -53,6 +51,8 @@ static int mlock_fixup(struct vm_area_st
 
vma->vm_mm->locked_vm -= pages;
 out:
+   if (ret == -ENOMEM)
+   ret = -EAGAIN;
return ret;
 }
 
--- 2.6.11-rc3-bk8/mm/mmap.c2005-02-11 15:49:13.0 +
+++ linux/mm/mmap.c 2005-02-11 19:07:35.0 +
@@ -1747,6 +1747,9 @@ int split_vma(struct mm_struct * mm, str
struct mempolicy *pol;
struct vm_area_struct *new;
 
+   if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
+   return -EINVAL;
+
if (mm->map_count >= sysctl_max_map_count)
return -ENOMEM;
 
@@ -1821,20 +1824,18 @@ int do_munmap(struct mm_struct *mm, unsi
 * places tmp vma above, and higher split_vma places tmp vma below.
 */
if (start > mpnt->vm_start) {
-   if (is_vm_hugetlb_page(mpnt) && (start & ~HPAGE_MASK))
-   return -EINVAL;
-   if (split_vma(mm, mpnt, start, 0))
-   return -ENOMEM;
+   int error = split_vma(mm, mpnt, start, 0);
+   if (error)
+   return error;
prev = mpnt;
}
 
/* Does it split the last one? */
last = find_vma(mm, end);
if (last && end > last->vm_start) {
-   if (is_vm_hugetlb_page(last) && (end & ~HPAGE_MASK))
-   return -EINVAL;
-   if (split_vma(mm, last, end, 1))
-   return -ENOMEM;
+   int error = split_vma(mm, last, end, 1);
+   if (error)
+   return error;
}
mpnt = prev? prev->vm_next: mm->mmap;
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the 

[PATCH] general split_vma hugetlb fix

2005-02-11 Thread Hugh Dickins
My recent do_munmap hugetlb fix has proved inadequate.  There are
other places (madvise, mbind, mlock, mprotect) where split_vma is
called.  Only mprotect excludes a hugetlb vma: the others are in
danger of splitting at a misaligned address, causing later BUGs.

So move the ~HPAGE_MASK check from do_munmap to split_vma itself;
and fix up those places (madvise and mlock) which expect split_vma
can fail only with -ENOMEM, and wish to convert that to -EAGAIN.
(It appears genuine that some of these syscalls should be failing
with -ENOMEM and some with -EAGAIN, so respect those behaviours.)

madvise_dontneed doesn't use split_vma, but is equally in danger
of causing a hugetlb BUG via zap_page_range.  Whereas elsewhere the
patch is permissive (allowing the operation on a hugetlb vma even when
pointless, so long as it doesn't missplit it), here we must use -EINVAL
on any hugetlb vma, since a page fault would hit the BUG in its nopage.

Signed-off-by: Hugh Dickins [EMAIL PROTECTED]

--- 2.6.11-rc3-bk8/mm/madvise.c 2004-12-24 21:36:31.0 +
+++ linux/mm/madvise.c  2005-02-11 19:07:35.0 +
@@ -8,7 +8,7 @@
 #include linux/mman.h
 #include linux/pagemap.h
 #include linux/syscalls.h
-
+#include linux/hugetlb.h
 
 /*
  * We can potentially split a vm area into separate
@@ -18,18 +18,18 @@ static long madvise_behavior(struct vm_a
 unsigned long end, int behavior)
 {
struct mm_struct * mm = vma-vm_mm;
-   int error;
+   int error = 0;
 
if (start != vma-vm_start) {
error = split_vma(mm, vma, start, 1);
if (error)
-   return -EAGAIN;
+   goto out;
}
 
if (end != vma-vm_end) {
error = split_vma(mm, vma, end, 0);
if (error)
-   return -EAGAIN;
+   goto out;
}
 
/*
@@ -48,7 +48,10 @@ static long madvise_behavior(struct vm_a
break;
}
 
-   return 0;
+out:
+   if (error == -ENOMEM)
+   error = -EAGAIN;
+   return error;
 }
 
 /*
@@ -94,7 +97,7 @@ static long madvise_willneed(struct vm_a
 static long madvise_dontneed(struct vm_area_struct * vma,
 unsigned long start, unsigned long end)
 {
-   if (vma-vm_flags  VM_LOCKED)
+   if ((vma-vm_flags  VM_LOCKED) || is_vm_hugetlb_page(vma))
return -EINVAL;
 
if (unlikely(vma-vm_flags  VM_NONLINEAR)) {
--- 2.6.11-rc3-bk8/mm/mlock.c   2005-02-03 09:06:16.0 +
+++ linux/mm/mlock.c2005-02-11 19:07:35.0 +
@@ -21,17 +21,15 @@ static int mlock_fixup(struct vm_area_st
goto out;
 
if (start != vma-vm_start) {
-   if (split_vma(mm, vma, start, 1)) {
-   ret = -EAGAIN;
+   ret = split_vma(mm, vma, start, 1);
+   if (ret)
goto out;
-   }
}
 
if (end != vma-vm_end) {
-   if (split_vma(mm, vma, end, 0)) {
-   ret = -EAGAIN;
+   ret = split_vma(mm, vma, end, 0);
+   if (ret)
goto out;
-   }
}
 
/*
@@ -53,6 +51,8 @@ static int mlock_fixup(struct vm_area_st
 
vma-vm_mm-locked_vm -= pages;
 out:
+   if (ret == -ENOMEM)
+   ret = -EAGAIN;
return ret;
 }
 
--- 2.6.11-rc3-bk8/mm/mmap.c2005-02-11 15:49:13.0 +
+++ linux/mm/mmap.c 2005-02-11 19:07:35.0 +
@@ -1747,6 +1747,9 @@ int split_vma(struct mm_struct * mm, str
struct mempolicy *pol;
struct vm_area_struct *new;
 
+   if (is_vm_hugetlb_page(vma)  (addr  ~HPAGE_MASK))
+   return -EINVAL;
+
if (mm-map_count = sysctl_max_map_count)
return -ENOMEM;
 
@@ -1821,20 +1824,18 @@ int do_munmap(struct mm_struct *mm, unsi
 * places tmp vma above, and higher split_vma places tmp vma below.
 */
if (start  mpnt-vm_start) {
-   if (is_vm_hugetlb_page(mpnt)  (start  ~HPAGE_MASK))
-   return -EINVAL;
-   if (split_vma(mm, mpnt, start, 0))
-   return -ENOMEM;
+   int error = split_vma(mm, mpnt, start, 0);
+   if (error)
+   return error;
prev = mpnt;
}
 
/* Does it split the last one? */
last = find_vma(mm, end);
if (last  end  last-vm_start) {
-   if (is_vm_hugetlb_page(last)  (end  ~HPAGE_MASK))
-   return -EINVAL;
-   if (split_vma(mm, last, end, 1))
-   return -ENOMEM;
+   int error = split_vma(mm, last, end, 1);
+   if (error)
+   return error;
}
mpnt = prev? prev-vm_next: mm-mmap;
 
-
To unsubscribe from this list: send the line unsubscribe 

Re: [PATCH] general split_vma hugetlb fix

2005-02-11 Thread William Lee Irwin III
On Fri, Feb 11, 2005 at 08:06:08PM +, Hugh Dickins wrote:
 My recent do_munmap hugetlb fix has proved inadequate.  There are
 other places (madvise, mbind, mlock, mprotect) where split_vma is
 called.  Only mprotect excludes a hugetlb vma: the others are in
 danger of splitting at a misaligned address, causing later BUGs.
 So move the ~HPAGE_MASK check from do_munmap to split_vma itself;
 and fix up those places (madvise and mlock) which expect split_vma
 can fail only with -ENOMEM, and wish to convert that to -EAGAIN.
 (It appears genuine that some of these syscalls should be failing
 with -ENOMEM and some with -EAGAIN, so respect those behaviours.)
 madvise_dontneed doesn't use split_vma, but is equally in danger
 of causing a hugetlb BUG via zap_page_range.  Whereas elsewhere the
 patch is permissive (allowing the operation on a hugetlb vma even when
 pointless, so long as it doesn't missplit it), here we must use -EINVAL
 on any hugetlb vma, since a page fault would hit the BUG in its nopage.
 Signed-off-by: Hugh Dickins [EMAIL PROTECTED]

As usual, excellent work, Hugh. akpm, Linus, please apply.

Acked-by: William Irwin [EMAIL PROTECTED]


-- wli
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/