[PATCH v7 01/12] Add get_user_pages() variant that fails if major fault is required.
From: Gleb Natapov g...@redhat.com This patch add get_user_pages() variant that only succeeds if getting a reference to a page doesn't require major fault. Reviewed-by: Rik van Riel r...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com --- fs/ncpfs/mmap.c|2 ++ include/linux/mm.h |5 + mm/filemap.c |3 +++ mm/memory.c| 31 --- mm/shmem.c |8 +++- 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 56f5b3a..b9c4f36 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -39,6 +39,8 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area, int bufsize; int pos; /* XXX: loff_t ? */ + if (vmf-flags FAULT_FLAG_MINOR) + return VM_FAULT_MAJOR | VM_FAULT_ERROR; /* * ncpfs has nothing against high pages as long * as recvmsg and memset works on it diff --git a/include/linux/mm.h b/include/linux/mm.h index 74949fb..da32900 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -144,6 +144,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01/* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02/* Fault was via a nonlinear mapping */ #define FAULT_FLAG_MKWRITE 0x04/* Fault was mkwrite of existing pte */ +#define FAULT_FLAG_MINOR 0x08/* Do only minor fault */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -848,6 +849,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void * int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); +int get_user_pages_noio(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); @@ -1394,6 +1398,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04/* do get_page on page */ #define FOLL_DUMP 0x08/* give error on hole if it would be zero */ #define FOLL_FORCE 0x10/* get_user_pages read/write w/o permission */ +#define FOLL_MINOR 0x20/* do only minor page faults */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/filemap.c b/mm/filemap.c index 3d4df44..ef28b6d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1548,6 +1548,9 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto no_cached_page; } } else { + if (vmf-flags FAULT_FLAG_MINOR) + return VM_FAULT_MAJOR | VM_FAULT_ERROR; + /* No page in the page cache at all */ do_sync_mmap_readahead(vma, ra, file, offset); count_vm_event(PGMAJFAULT); diff --git a/mm/memory.c b/mm/memory.c index 0e18b4d..b221458 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1441,10 +1441,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, cond_resched(); while (!(page = follow_page(vma, start, foll_flags))) { int ret; + unsigned int fault_fl = + ((foll_flags FOLL_WRITE) ? + FAULT_FLAG_WRITE : 0) | + ((foll_flags FOLL_MINOR) ? + FAULT_FLAG_MINOR : 0); - ret = handle_mm_fault(mm, vma, start, - (foll_flags FOLL_WRITE) ? - FAULT_FLAG_WRITE : 0); + ret = handle_mm_fault(mm, vma, start, fault_fl); if (ret VM_FAULT_ERROR) { if (ret VM_FAULT_OOM) @@ -1452,6 +1455,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (ret (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) return i ? i : -EFAULT; + else if (ret VM_FAULT_MAJOR) + return i ? i : -EFAULT; BUG(); } if (ret VM_FAULT_MAJOR) @@ -1562,6 +1567,23 @@ int get_user_pages(struct
[PATCH v7 01/12] Add get_user_pages() variant that fails if major fault is required.
This patch add get_user_pages() variant that only succeeds if getting a reference to a page doesn't require major fault. Reviewed-by: Rik van Riel r...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com --- fs/ncpfs/mmap.c|2 ++ include/linux/mm.h |5 + mm/filemap.c |3 +++ mm/memory.c| 31 --- mm/shmem.c |8 +++- 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 56f5b3a..b9c4f36 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -39,6 +39,8 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area, int bufsize; int pos; /* XXX: loff_t ? */ + if (vmf-flags FAULT_FLAG_MINOR) + return VM_FAULT_MAJOR | VM_FAULT_ERROR; /* * ncpfs has nothing against high pages as long * as recvmsg and memset works on it diff --git a/include/linux/mm.h b/include/linux/mm.h index 74949fb..da32900 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -144,6 +144,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01/* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02/* Fault was via a nonlinear mapping */ #define FAULT_FLAG_MKWRITE 0x04/* Fault was mkwrite of existing pte */ +#define FAULT_FLAG_MINOR 0x08/* Do only minor fault */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -848,6 +849,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void * int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); +int get_user_pages_noio(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); @@ -1394,6 +1398,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04/* do get_page on page */ #define FOLL_DUMP 0x08/* give error on hole if it would be zero */ #define FOLL_FORCE 0x10/* get_user_pages read/write w/o permission */ +#define FOLL_MINOR 0x20/* do only minor page faults */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/filemap.c b/mm/filemap.c index 3d4df44..ef28b6d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1548,6 +1548,9 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto no_cached_page; } } else { + if (vmf-flags FAULT_FLAG_MINOR) + return VM_FAULT_MAJOR | VM_FAULT_ERROR; + /* No page in the page cache at all */ do_sync_mmap_readahead(vma, ra, file, offset); count_vm_event(PGMAJFAULT); diff --git a/mm/memory.c b/mm/memory.c index 0e18b4d..b221458 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1441,10 +1441,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, cond_resched(); while (!(page = follow_page(vma, start, foll_flags))) { int ret; + unsigned int fault_fl = + ((foll_flags FOLL_WRITE) ? + FAULT_FLAG_WRITE : 0) | + ((foll_flags FOLL_MINOR) ? + FAULT_FLAG_MINOR : 0); - ret = handle_mm_fault(mm, vma, start, - (foll_flags FOLL_WRITE) ? - FAULT_FLAG_WRITE : 0); + ret = handle_mm_fault(mm, vma, start, fault_fl); if (ret VM_FAULT_ERROR) { if (ret VM_FAULT_OOM) @@ -1452,6 +1455,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (ret (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) return i ? i : -EFAULT; + else if (ret VM_FAULT_MAJOR) + return i ? i : -EFAULT; BUG(); } if (ret VM_FAULT_MAJOR) @@ -1562,6 +1567,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct