[PATCH v7 01/12] Add get_user_pages() variant that fails if major fault is required.

2010-10-14 Thread y
From: Gleb Natapov g...@redhat.com

This patch add get_user_pages() variant that only succeeds if getting
a reference to a page doesn't require major fault.

Reviewed-by: Rik van Riel r...@redhat.com
Signed-off-by: Gleb Natapov g...@redhat.com
---
 fs/ncpfs/mmap.c|2 ++
 include/linux/mm.h |5 +
 mm/filemap.c   |3 +++
 mm/memory.c|   31 ---
 mm/shmem.c |8 +++-
 5 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 56f5b3a..b9c4f36 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -39,6 +39,8 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
int bufsize;
int pos; /* XXX: loff_t ? */
 
+   if (vmf-flags  FAULT_FLAG_MINOR)
+   return VM_FAULT_MAJOR | VM_FAULT_ERROR;
/*
 * ncpfs has nothing against high pages as long
 * as recvmsg and memset works on it
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 74949fb..da32900 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -144,6 +144,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_WRITE   0x01/* Fault was a write access */
 #define FAULT_FLAG_NONLINEAR   0x02/* Fault was via a nonlinear mapping */
 #define FAULT_FLAG_MKWRITE 0x04/* Fault was mkwrite of existing pte */
+#define FAULT_FLAG_MINOR   0x08/* Do only minor fault */
 
 /*
  * This interface is used by x86 PAT code to identify a pfn mapping that is
@@ -848,6 +849,9 @@ extern int access_process_vm(struct task_struct *tsk, 
unsigned long addr, void *
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int nr_pages, int write, int force,
struct page **pages, struct vm_area_struct **vmas);
+int get_user_pages_noio(struct task_struct *tsk, struct mm_struct *mm,
+   unsigned long start, int nr_pages, int write, int force,
+   struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
 struct page *get_dump_page(unsigned long addr);
@@ -1394,6 +1398,7 @@ struct page *follow_page(struct vm_area_struct *, 
unsigned long address,
 #define FOLL_GET   0x04/* do get_page on page */
 #define FOLL_DUMP  0x08/* give error on hole if it would be zero */
 #define FOLL_FORCE 0x10/* get_user_pages read/write w/o permission */
+#define FOLL_MINOR 0x20/* do only minor page faults */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
diff --git a/mm/filemap.c b/mm/filemap.c
index 3d4df44..ef28b6d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1548,6 +1548,9 @@ int filemap_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
goto no_cached_page;
}
} else {
+   if (vmf-flags  FAULT_FLAG_MINOR)
+   return VM_FAULT_MAJOR | VM_FAULT_ERROR;
+
/* No page in the page cache at all */
do_sync_mmap_readahead(vma, ra, file, offset);
count_vm_event(PGMAJFAULT);
diff --git a/mm/memory.c b/mm/memory.c
index 0e18b4d..b221458 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1441,10 +1441,13 @@ int __get_user_pages(struct task_struct *tsk, struct 
mm_struct *mm,
cond_resched();
while (!(page = follow_page(vma, start, foll_flags))) {
int ret;
+   unsigned int fault_fl =
+   ((foll_flags  FOLL_WRITE) ?
+   FAULT_FLAG_WRITE : 0) |
+   ((foll_flags  FOLL_MINOR) ?
+   FAULT_FLAG_MINOR : 0);
 
-   ret = handle_mm_fault(mm, vma, start,
-   (foll_flags  FOLL_WRITE) ?
-   FAULT_FLAG_WRITE : 0);
+   ret = handle_mm_fault(mm, vma, start, fault_fl);
 
if (ret  VM_FAULT_ERROR) {
if (ret  VM_FAULT_OOM)
@@ -1452,6 +1455,8 @@ int __get_user_pages(struct task_struct *tsk, struct 
mm_struct *mm,
if (ret 
(VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
return i ? i : -EFAULT;
+   else if (ret  VM_FAULT_MAJOR)
+   return i ? i : -EFAULT;
BUG();
}
if (ret  VM_FAULT_MAJOR)
@@ -1562,6 +1567,23 @@ int get_user_pages(struct 

[PATCH v7 01/12] Add get_user_pages() variant that fails if major fault is required.

2010-10-14 Thread Gleb Natapov
This patch add get_user_pages() variant that only succeeds if getting
a reference to a page doesn't require major fault.

Reviewed-by: Rik van Riel r...@redhat.com
Signed-off-by: Gleb Natapov g...@redhat.com
---
 fs/ncpfs/mmap.c|2 ++
 include/linux/mm.h |5 +
 mm/filemap.c   |3 +++
 mm/memory.c|   31 ---
 mm/shmem.c |8 +++-
 5 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 56f5b3a..b9c4f36 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -39,6 +39,8 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
int bufsize;
int pos; /* XXX: loff_t ? */
 
+   if (vmf-flags  FAULT_FLAG_MINOR)
+   return VM_FAULT_MAJOR | VM_FAULT_ERROR;
/*
 * ncpfs has nothing against high pages as long
 * as recvmsg and memset works on it
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 74949fb..da32900 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -144,6 +144,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_WRITE   0x01/* Fault was a write access */
 #define FAULT_FLAG_NONLINEAR   0x02/* Fault was via a nonlinear mapping */
 #define FAULT_FLAG_MKWRITE 0x04/* Fault was mkwrite of existing pte */
+#define FAULT_FLAG_MINOR   0x08/* Do only minor fault */
 
 /*
  * This interface is used by x86 PAT code to identify a pfn mapping that is
@@ -848,6 +849,9 @@ extern int access_process_vm(struct task_struct *tsk, 
unsigned long addr, void *
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int nr_pages, int write, int force,
struct page **pages, struct vm_area_struct **vmas);
+int get_user_pages_noio(struct task_struct *tsk, struct mm_struct *mm,
+   unsigned long start, int nr_pages, int write, int force,
+   struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
 struct page *get_dump_page(unsigned long addr);
@@ -1394,6 +1398,7 @@ struct page *follow_page(struct vm_area_struct *, 
unsigned long address,
 #define FOLL_GET   0x04/* do get_page on page */
 #define FOLL_DUMP  0x08/* give error on hole if it would be zero */
 #define FOLL_FORCE 0x10/* get_user_pages read/write w/o permission */
+#define FOLL_MINOR 0x20/* do only minor page faults */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
diff --git a/mm/filemap.c b/mm/filemap.c
index 3d4df44..ef28b6d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1548,6 +1548,9 @@ int filemap_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
goto no_cached_page;
}
} else {
+   if (vmf-flags  FAULT_FLAG_MINOR)
+   return VM_FAULT_MAJOR | VM_FAULT_ERROR;
+
/* No page in the page cache at all */
do_sync_mmap_readahead(vma, ra, file, offset);
count_vm_event(PGMAJFAULT);
diff --git a/mm/memory.c b/mm/memory.c
index 0e18b4d..b221458 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1441,10 +1441,13 @@ int __get_user_pages(struct task_struct *tsk, struct 
mm_struct *mm,
cond_resched();
while (!(page = follow_page(vma, start, foll_flags))) {
int ret;
+   unsigned int fault_fl =
+   ((foll_flags  FOLL_WRITE) ?
+   FAULT_FLAG_WRITE : 0) |
+   ((foll_flags  FOLL_MINOR) ?
+   FAULT_FLAG_MINOR : 0);
 
-   ret = handle_mm_fault(mm, vma, start,
-   (foll_flags  FOLL_WRITE) ?
-   FAULT_FLAG_WRITE : 0);
+   ret = handle_mm_fault(mm, vma, start, fault_fl);
 
if (ret  VM_FAULT_ERROR) {
if (ret  VM_FAULT_OOM)
@@ -1452,6 +1455,8 @@ int __get_user_pages(struct task_struct *tsk, struct 
mm_struct *mm,
if (ret 
(VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
return i ? i : -EFAULT;
+   else if (ret  VM_FAULT_MAJOR)
+   return i ? i : -EFAULT;
BUG();
}
if (ret  VM_FAULT_MAJOR)
@@ -1562,6 +1567,23 @@ int get_user_pages(struct task_struct *tsk, struct 
mm_struct