Denial of service in OOM killer:
http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2010-4243
>From Oleg Nesterov <[email protected]>
Subject: [PATCH 2.6.35] exec: make argv/envp memory visible to oom-killer

From: Oleg Nesterov <[email protected]>

commit 3c77f845722158206a7209c45ccddc264d19319c upstream.

Brad Spengler published a local memory-allocation DoS that
evades the OOM-killer (though not the virtual memory RLIMIT):
http://www.grsecurity.net/~spender/64bit_dos.c

execve()->copy_strings() can allocate a lot of memory, but
this is not visible to oom-killer, nobody can see the nascent
bprm->mm and take it into account.

With this patch get_arg_page() increments current's MM_ANONPAGES
counter every time we allocate the new page for argv/envp. When
do_execve() succeds or fails, we change this counter back.

Technically this is not 100% correct, we can't know if the new
page is swapped out and turn MM_ANONPAGES into MM_SWAPENTS, but
I don't think this really matters and everything becomes correct
once exec changes ->mm or fails.

Compared to upstream:

	before 2.6.36 kernel, oom-killer's badness() takes
	mm->total_vm into account and nothing else. So
	acct_arg_size() has to play with this counter too.

Reported-by: Brad Spengler <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Reviewed-and-discussed-by: KOSAKI Motohiro <[email protected]>
Signed-off-by: Oleg Nesterov <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>

---

 fs/exec.c               |   28 ++++++++++++++++++++++++++--
 include/linux/binfmts.h |    1 +
 2 files changed, 27 insertions(+), 2 deletions(-)

Index: linux/include/linux/binfmts.h
===================================================================
--- linux.orig/include/linux/binfmts.h
+++ linux/include/linux/binfmts.h
@@ -29,6 +29,7 @@ struct linux_binprm{
 	char buf[BINPRM_BUF_SIZE];
 #ifdef CONFIG_MMU
 	struct vm_area_struct *vma;
+	unsigned long vma_pages;
 #else
 # define MAX_ARG_PAGES	32
 	struct page *page[MAX_ARG_PAGES];
Index: linux/fs/exec.c
===================================================================
--- linux.orig/fs/exec.c
+++ linux/fs/exec.c
@@ -158,6 +158,21 @@ out:
 
 #ifdef CONFIG_MMU
 
+static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+{
+	struct mm_struct *mm = current->mm;
+	long diff = (long)(pages - bprm->vma_pages);
+
+	if (!mm || !diff)
+		return;
+
+	bprm->vma_pages = pages;
+
+	down_write(&mm->mmap_sem);
+	mm->total_vm += diff;
+	up_write(&mm->mmap_sem);
+}
+
 static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		int write)
 {
@@ -180,6 +195,8 @@ static struct page *get_arg_page(struct
 		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
 		struct rlimit *rlim;
 
+		acct_arg_size(bprm, size / PAGE_SIZE);
+
 		/*
 		 * We've historically supported up to 32 pages (ARG_MAX)
 		 * of argument strings even with small stacks
@@ -270,6 +287,10 @@ static bool valid_arg_len(struct linux_b
 
 #else
 
+static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+{
+}
+
 static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		int write)
 {
@@ -991,6 +1012,7 @@ int flush_old_exec(struct linux_binprm *
 	/*
 	 * Release all of the old mmap stuff
 	 */
+	acct_arg_size(bprm, 0);
 	retval = exec_mmap(bprm->mm);
 	if (retval)
 		goto out;
@@ -1415,8 +1437,10 @@ int do_execve(char * filename,
 	return retval;
 
 out:
-	if (bprm->mm)
-		mmput (bprm->mm);
+	if (bprm->mm) {
+		acct_arg_size(bprm, 0);
+		mmput(bprm->mm);
+	}
 
 out_file:
 	if (bprm->file) {
>From 114279be2120a916e8a04feeb2ac976a10016f2f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <[email protected]>
Date: Tue, 30 Nov 2010 20:56:02 +0100
Subject: exec: copy-and-paste the fixes into compat_do_execve() paths

From: Oleg Nesterov <[email protected]>

commit 114279be2120a916e8a04feeb2ac976a10016f2f upstream.

Note: this patch targets 2.6.37 and tries to be as simple as possible.
That is why it adds more copy-and-paste horror into fs/compat.c and
uglifies fs/exec.c, this will be cleanuped later.

compat_copy_strings() plays with bprm->vma/mm directly and thus has
two problems: it lacks the RLIMIT_STACK check and argv/envp memory
is not visible to oom killer.

Export acct_arg_size() and get_arg_page(), change compat_copy_strings()
to use get_arg_page(), change compat_do_execve() to do acct_arg_size(0)
as do_execve() does.

Add the fatal_signal_pending/cond_resched checks into compat_count() and
compat_copy_strings(), this matches the code in fs/exec.c and certainly
makes sense.

Signed-off-by: Oleg Nesterov <[email protected]>
Cc: KOSAKI Motohiro <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---
 fs/compat.c             |   28 +++++++++++++++-------------
 fs/exec.c               |    8 ++++----
 include/linux/binfmts.h |    4 ++++
 3 files changed, 23 insertions(+), 17 deletions(-)

Index: linux/fs/compat.c
===================================================================
--- linux.orig/fs/compat.c
+++ linux/fs/compat.c
@@ -1376,6 +1376,10 @@ static int compat_count(compat_uptr_t __
 			argv++;
 			if (i++ >= max)
 				return -E2BIG;
+
+			if (fatal_signal_pending(current))
+				return -ERESTARTNOHAND;
+			cond_resched();
 		}
 	}
 	return i;
@@ -1417,6 +1421,12 @@ static int compat_copy_strings(int argc,
 		while (len > 0) {
 			int offset, bytes_to_copy;
 
+			if (fatal_signal_pending(current)) {
+				ret = -ERESTARTNOHAND;
+				goto out;
+			}
+			cond_resched();
+
 			offset = pos % PAGE_SIZE;
 			if (offset == 0)
 				offset = PAGE_SIZE;
@@ -1433,18 +1443,8 @@ static int compat_copy_strings(int argc,
 			if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
 				struct page *page;
 
-#ifdef CONFIG_STACK_GROWSUP
-				ret = expand_stack_downwards(bprm->vma, pos);
-				if (ret < 0) {
-					/* We've exceed the stack rlimit. */
-					ret = -E2BIG;
-					goto out;
-				}
-#endif
-				ret = get_user_pages(current, bprm->mm, pos,
-						     1, 1, 1, &page, NULL);
-				if (ret <= 0) {
-					/* We've exceed the stack rlimit. */
+				page = get_arg_page(bprm, pos, 1);
+				if (!page) {
 					ret = -E2BIG;
 					goto out;
 				}
@@ -1565,8 +1565,10 @@ int compat_do_execve(char * filename,
 	return retval;
 
 out:
-	if (bprm->mm)
+	if (bprm->mm) {
+		acct_arg_size(bprm, 0);
 		mmput(bprm->mm);
+	}
 
 out_file:
 	if (bprm->file) {
Index: linux/fs/exec.c
===================================================================
--- linux.orig/fs/exec.c
+++ linux/fs/exec.c
@@ -158,7 +158,7 @@ out:
 
 #ifdef CONFIG_MMU
 
-static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 {
 	struct mm_struct *mm = current->mm;
 	long diff = (long)(pages - bprm->vma_pages);
@@ -173,7 +173,7 @@ static void acct_arg_size(struct linux_b
 	up_write(&mm->mmap_sem);
 }
 
-static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		int write)
 {
 	struct page *page;
@@ -287,11 +287,11 @@ static bool valid_arg_len(struct linux_b
 
 #else
 
-static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 {
 }
 
-static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		int write)
 {
 	struct page *page;
Index: linux/include/linux/binfmts.h
===================================================================
--- linux.orig/include/linux/binfmts.h
+++ linux/include/linux/binfmts.h
@@ -60,6 +60,10 @@ struct linux_binprm{
 	unsigned long loader, exec;
 };
 
+extern void acct_arg_size(struct linux_binprm *bprm, unsigned long pages);
+extern struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+					int write);
+
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
 #define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT)
 
_______________________________________________
stable mailing list
[email protected]
http://linux.kernel.org/mailman/listinfo/stable

Reply via email to