On Fri, May 15, 2009 at 04:02:16PM +0200, Leonid Chaichenets wrote:
> Hello list,
>
> I‘ve modified the patch by Ryan Finnie to successfully patch against
> stock ubuntu sources (package: linux-source-2.6.28).
Okay, I should include the actual patch...
--- arch/um/include/asm/compat.h
+++ arch/um/include/asm/compat.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_X86_64_COMPAT_H
+#define _ASM_X86_64_COMPAT_H
+
+typedef s32 compat_int_t;
+typedef s32 compat_long_t;
+typedef u32 compat_uint_t;
+typedef u32 compat_ulong_t;
+
+#endif /* _ASM_X86_64_COMPAT_H */
--- arch/um/include/asm/proc_mm_32.h
+++ arch/um/include/asm/proc_mm_32.h
@@ -0,0 +1,18 @@
+#ifndef __ASM_PROC_MM
+#define __ASM_PROC_MM
+
+#include <asm/page.h>
+
+extern long do_mmap2(struct mm_struct *mm, unsigned long addr,
+ unsigned long len, unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff);
+
+static inline long __do_mmap(struct mm_struct *mm, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd,
+ unsigned long off)
+{
+ return do_mmap2(mm, addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+}
+
+#endif /* __ASM_PROC_MM */
--- arch/um/include/asm/proc_mm_64.h
+++ arch/um/include/asm/proc_mm_64.h
@@ -0,0 +1,58 @@
+#ifndef __ASM_PROC_MM
+#define __ASM_PROC_MM
+#include <linux/types.h>
+
+#include <asm/compat.h>
+
+struct mm_mmap32 {
+ compat_ulong_t addr;
+ compat_ulong_t len;
+ compat_ulong_t prot;
+ compat_ulong_t flags;
+ compat_ulong_t fd;
+ compat_ulong_t offset;
+};
+
+struct mm_munmap32 {
+ compat_ulong_t addr;
+ compat_ulong_t len;
+};
+
+struct mm_mprotect32 {
+ compat_ulong_t addr;
+ compat_ulong_t len;
+ compat_uint_t prot;
+};
+
+struct proc_mm_op32 {
+ compat_int_t op;
+ union {
+ struct mm_mmap32 mmap;
+ struct mm_munmap32 munmap;
+ struct mm_mprotect32 mprotect;
+ compat_int_t copy_segments;
+ } u;
+};
+
+extern ssize_t write_proc_mm_emul(struct file *file, const char *buffer,
+ size_t count, loff_t *ppos);
+
+extern struct mm_struct *proc_mm_get_mm64(int fd);
+
+extern long do64_mmap(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long off);
+
+static inline long __do_mmap(struct mm_struct *mm, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd,
+ unsigned long off)
+{
+ /* The latter one is stricter, since will actually check that off is page
+ * aligned. The first one skipped the check. */
+
+ /* return do32_mmap2(mm, addr, len, prot, flags, fd, off >>
+ * PAGE_SHIFT);*/
+ return do64_mmap(mm, addr, len, prot, flags, fd, off);
+}
+
+#endif /* __ASM_PROC_MM */
--- arch/um/sys-i386/ldt.c
+++ arch/um/sys-i386/ldt.c
@@ -7,7 +7,7 @@
#include <linux/sched.h>
#include <asm/unistd.h>
#include "os.h"
-#include "proc_mm.h"
+#include "linux/proc_mm.h"
#include "skas.h"
#include "skas_ptrace.h"
#include "sysdep/tls.h"
--- arch/x86/ia32/sys_ia32.c
+++ arch/x86/ia32/sys_ia32.c
@@ -562,11 +562,10 @@
return ret;
}
-asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
+long do32_mmap2(struct mm_struct *mm, unsigned long addr,
+ unsigned long len, unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
{
- struct mm_struct *mm = current->mm;
unsigned long error;
struct file *file = NULL;
@@ -578,7 +577,7 @@
}
down_write(&mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+ error = __do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff);
up_write(&mm->mmap_sem);
if (file)
@@ -586,6 +585,15 @@
return error;
}
+/* XXX: this wrapper can be probably removed, we can simply use the 64-bit
+ * version.*/
+asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
+{
+ return do32_mmap2(current->mm, addr, len, prot, flags, fd, pgoff);
+}
+
asmlinkage long sys32_olduname(struct oldold_utsname __user *name)
{
char *arch = "x86_64";
--- arch/x86/include/asm/desc.h
+++ arch/x86/include/asm/desc.h
@@ -44,6 +44,9 @@
return per_cpu(gdt_page, cpu).gdt;
}
+extern int __modify_ldt(struct mm_struct * mm, int func, void __user *ptr,
+ unsigned long bytecount);
+
#ifdef CONFIG_X86_64
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
--- arch/x86/include/asm/mmu_context_32.h
+++ arch/x86/include/asm/mmu_context_32.h
@@ -16,6 +16,10 @@
{
int cpu = smp_processor_id();
+#ifdef CONFIG_SMP
+ prev = per_cpu(cpu_tlbstate, cpu).active_mm;
+#endif
+
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
@@ -37,7 +41,6 @@
#ifdef CONFIG_SMP
else {
per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
- BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
--- arch/x86/include/asm/mmu_context_64.h
+++ arch/x86/include/asm/mmu_context_64.h
@@ -15,6 +15,9 @@
struct task_struct *tsk)
{
unsigned cpu = smp_processor_id();
+#ifdef CONFIG_SMP
+ prev = read_pda(active_mm);
+#endif
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
@@ -31,8 +34,6 @@
#ifdef CONFIG_SMP
else {
write_pda(mmu_state, TLBSTATE_OK);
- if (read_pda(active_mm) != next)
- BUG();
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
--- arch/x86/include/asm/mmu_context.h
+++ arch/x86/include/asm/mmu_context.h
@@ -16,8 +16,21 @@
#endif /* !CONFIG_PARAVIRT */
/*
- * Used for LDT copy/destruction.
+ * Used for LDT initialization/destruction. You cannot copy an LDT with
+ * init_new_context, since it thinks you are passing it a new LDT and won't
+ * deallocate its old content.
*/
+
+/* LDT initialization for a clean environment - needed for SKAS.*/
+static inline void init_new_empty_context(struct mm_struct *mm)
+{
+ mutex_init(&mm->context.lock);
+ mm->context.size = 0;
+}
+
+/* LDT copy for SKAS - for the above problem.*/
+int copy_context(struct mm_struct *mm, struct mm_struct *old_mm);
+
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context(struct mm_struct *mm);
--- arch/x86/include/asm/proc_mm_32.h
+++ arch/x86/include/asm/proc_mm_32.h
@@ -0,0 +1,18 @@
+#ifndef __ASM_PROC_MM
+#define __ASM_PROC_MM
+
+#include <asm/page.h>
+
+extern long do_mmap2(struct mm_struct *mm, unsigned long addr,
+ unsigned long len, unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff);
+
+static inline long __do_mmap(struct mm_struct *mm, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd,
+ unsigned long off)
+{
+ return do_mmap2(mm, addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+}
+
+#endif /* __ASM_PROC_MM */
--- arch/x86/include/asm/proc_mm_64.h
+++ arch/x86/include/asm/proc_mm_64.h
@@ -0,0 +1,58 @@
+#ifndef __ASM_PROC_MM
+#define __ASM_PROC_MM
+#include <linux/types.h>
+
+#include <asm/compat.h>
+
+struct mm_mmap32 {
+ compat_ulong_t addr;
+ compat_ulong_t len;
+ compat_ulong_t prot;
+ compat_ulong_t flags;
+ compat_ulong_t fd;
+ compat_ulong_t offset;
+};
+
+struct mm_munmap32 {
+ compat_ulong_t addr;
+ compat_ulong_t len;
+};
+
+struct mm_mprotect32 {
+ compat_ulong_t addr;
+ compat_ulong_t len;
+ compat_uint_t prot;
+};
+
+struct proc_mm_op32 {
+ compat_int_t op;
+ union {
+ struct mm_mmap32 mmap;
+ struct mm_munmap32 munmap;
+ struct mm_mprotect32 mprotect;
+ compat_int_t copy_segments;
+ } u;
+};
+
+extern ssize_t write_proc_mm_emul(struct file *file, const char *buffer,
+ size_t count, loff_t *ppos);
+
+extern struct mm_struct *proc_mm_get_mm64(int fd);
+
+extern long do64_mmap(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long off);
+
+static inline long __do_mmap(struct mm_struct *mm, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd,
+ unsigned long off)
+{
+ /* The latter one is stricter, since will actually check that off is page
+ * aligned. The first one skipped the check. */
+
+ /* return do32_mmap2(mm, addr, len, prot, flags, fd, off >>
+ * PAGE_SHIFT);*/
+ return do64_mmap(mm, addr, len, prot, flags, fd, off);
+}
+
+#endif /* __ASM_PROC_MM */
--- arch/x86/include/asm/ptrace-abi.h
+++ arch/x86/include/asm/ptrace-abi.h
@@ -65,6 +65,12 @@
#define PTRACE_GETFPXREGS 18
#define PTRACE_SETFPXREGS 19
+#define PTRACE_FAULTINFO 52
+/* 53 was used for PTRACE_SIGPENDING, don't reuse it. */
+#define PTRACE_LDT 54
+#define PTRACE_SWITCH_MM 55
+#define PTRACE_EX_FAULTINFO 56
+
#define PTRACE_OLDSETOPTIONS 21
/* only useful for access 32bit programs / kernels */
--- arch/x86/include/asm/ptrace.h
+++ arch/x86/include/asm/ptrace.h
@@ -275,4 +275,69 @@
#endif /* !__ASSEMBLY__ */
+/*For SKAS3 support.*/
+#ifndef _LINUX_PTRACE_STRUCT_DEF
+#define _LINUX_PTRACE_STRUCT_DEF
+
+#define PTRACE_FAULTINFO 52
+/* 53 was used for PTRACE_SIGPENDING, don't reuse it. */
+#define PTRACE_LDT 54
+#define PTRACE_SWITCH_MM 55
+#define PTRACE_EX_FAULTINFO 56
+
+struct ptrace_faultinfo {
+ int is_write;
+ unsigned long addr;
+};
+
+struct ptrace_ex_faultinfo {
+ int is_write;
+ unsigned long addr;
+ int trap_no;
+};
+
+struct ptrace_ldt {
+ int func;
+ void *ptr;
+ unsigned long bytecount;
+};
+
+#endif /*ifndef _LINUX_PTRACE_STRUCT_DEF*/
+
+/* Stolen from
+#include <linux/compat.h>; we can't include it because
+there is a nasty ciclic include chain.
+*/
+
+#include <asm/types.h>
+
+#define compat_int_t s32
+#define compat_long_t s32
+#define compat_uint_t u32
+#define compat_ulong_t u32
+#define compat_uptr_t u32
+
+struct ptrace_faultinfo32 {
+ compat_int_t is_write;
+ compat_ulong_t addr;
+};
+
+struct ptrace_ex_faultinfo32 {
+ compat_int_t is_write;
+ compat_ulong_t addr;
+ compat_int_t trap_no;
+};
+
+struct ptrace_ldt32 {
+ compat_int_t func;
+ compat_uptr_t ptr; /*Actually a void pointer on i386, but must be converted.*/
+ compat_ulong_t bytecount;
+};
+
+#undef compat_int_t
+#undef compat_long_t
+#undef compat_uint_t
+#undef compat_ulong_t
+#undef compat_uptr_t
+
#endif /* _ASM_X86_PTRACE_H */
--- arch/x86/Kconfig
+++ arch/x86/Kconfig
@@ -962,6 +962,27 @@
config ARCH_PHYS_ADDR_T_64BIT
def_bool X86_64 || X86_PAE
+config PROC_MM
+ bool "/proc/mm support"
+ default y
+
+config PROC_MM_DUMPABLE
+ bool "Make UML childs /proc/<pid> completely browsable"
+ default n
+ depends on PROC_MM
+ help
+ If in doubt, say N.
+
+ This fiddles with some settings to make sure /proc/<pid> is completely
+ browsable by who started UML, at the expense of some additional
+ locking (maybe this could slow down the runned UMLs of a few percents,
+ I've not tested this).
+
+ Also, if there is a bug in this feature, there is some little
+ possibility to do privilege escalation if you have UML installed
+ setuid (which you shouldn't have done) or if UML changes uid on
+ startup (which will be a good thing, when enabled) ...
+
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
--- arch/x86/kernel/ldt.c
+++ arch/x86/kernel/ldt.c
@@ -28,10 +28,11 @@
}
#endif
-static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+static int alloc_ldt(struct mm_struct *mm, int mincount, int reload)
{
void *oldldt, *newldt;
int oldsize;
+ mm_context_t * pc = &mm->context;
if (mincount <= pc->size)
return 0;
@@ -66,13 +67,15 @@
if (reload) {
#ifdef CONFIG_SMP
preempt_disable();
- load_LDT(pc);
- if (!cpus_equal(current->mm->cpu_vm_mask,
+ if (¤t->active_mm->context == pc)
+ load_LDT(pc);
+ if (!cpus_equal(mm->cpu_vm_mask,
cpumask_of_cpu(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
- load_LDT(pc);
+ if (¤t->active_mm->context == pc)
+ load_LDT(pc);
#endif
}
if (oldsize) {
@@ -85,16 +88,16 @@
return 0;
}
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+static inline int copy_ldt(struct mm_struct *new, struct mm_struct *old)
{
- int err = alloc_ldt(new, old->size, 0);
+ int err = alloc_ldt(new, old->context.size, 0);
int i;
if (err < 0)
return err;
- for(i = 0; i < old->size; i++)
- write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
+ for(i = 0; i < old->context.size; i++)
+ write_ldt_entry(new->context.ldt, i, old->context.ldt + i * LDT_ENTRY_SIZE);
return 0;
}
@@ -102,22 +105,24 @@
* we do not have to muck with descriptors here, that is
* done in switch_mm() as needed.
*/
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+int copy_context(struct mm_struct *mm, struct mm_struct *old_mm)
{
- struct mm_struct *old_mm;
int retval = 0;
- mutex_init(&mm->context.lock);
- mm->context.size = 0;
- old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
mutex_lock(&old_mm->context.lock);
- retval = copy_ldt(&mm->context, &old_mm->context);
+ retval = copy_ldt(mm, old_mm);
mutex_unlock(&old_mm->context.lock);
}
return retval;
}
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ init_new_empty_context(mm);
+ return copy_context(mm, current->mm);
+}
+
/*
* No need to lock the MM as we are the last user
*
@@ -140,11 +145,10 @@
}
}
-static int read_ldt(void __user *ptr, unsigned long bytecount)
+static int read_ldt(struct mm_struct * mm, void __user * ptr, unsigned long bytecount)
{
int err;
unsigned long size;
- struct mm_struct *mm = current->mm;
if (!mm->context.size)
return 0;
@@ -189,9 +193,8 @@
return bytecount;
}
-static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
+static int write_ldt(struct mm_struct * mm, void __user *ptr, unsigned long bytecount, int oldmode)
{
- struct mm_struct *mm = current->mm;
struct desc_struct ldt;
int error;
struct user_desc ldt_info;
@@ -215,8 +218,7 @@
mutex_lock(&mm->context.lock);
if (ldt_info.entry_number >= mm->context.size) {
- error = alloc_ldt(¤t->mm->context,
- ldt_info.entry_number + 1, 1);
+ error = alloc_ldt(mm, ldt_info.entry_number+1, 1);
if (error < 0)
goto out_unlock;
}
@@ -244,24 +246,32 @@
return error;
}
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
- unsigned long bytecount)
+int __modify_ldt(struct mm_struct * mm, int func, void __user *ptr, unsigned long bytecount)
{
int ret = -ENOSYS;
switch (func) {
case 0:
- ret = read_ldt(ptr, bytecount);
+ ret = read_ldt(mm, ptr, bytecount);
break;
case 1:
- ret = write_ldt(ptr, bytecount, 1);
+ ret = write_ldt(mm, ptr, bytecount, 1);
break;
case 2:
ret = read_default_ldt(ptr, bytecount);
break;
case 0x11:
- ret = write_ldt(ptr, bytecount, 0);
+ ret = write_ldt(mm, ptr, bytecount, 0);
break;
}
return ret;
}
+
+asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+{
+ int ret = __modify_ldt(current->mm, func, ptr, bytecount);
+ /* A tail call would reorder parameters on the stack and they would then
+ * be restored at the wrong places. */
+ asmlinkage_protect(0, ret);
+ return ret;
+}
--- arch/x86/kernel/ptrace.c
+++ arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/signal.h>
+#include <linux/proc_mm.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -1130,6 +1131,68 @@
break;
#endif /* CONFIG_X86_PTRACE_BTS */
+#ifdef CONFIG_PROC_MM
+ case PTRACE_EX_FAULTINFO: {
+ struct ptrace_ex_faultinfo fault;
+
+ fault = ((struct ptrace_ex_faultinfo)
+ { .is_write = child->thread.error_code,
+ .addr = child->thread.cr2,
+ .trap_no = child->thread.trap_no });
+ ret = copy_to_user((unsigned long *) data, &fault,
+ sizeof(fault));
+ break;
+ }
+
+#ifndef CONFIG_X86_64
+ case PTRACE_FAULTINFO: {
+ struct ptrace_faultinfo fault;
+
+ fault = ((struct ptrace_faultinfo)
+ { .is_write = child->thread.error_code,
+ .addr = child->thread.cr2 });
+ ret = copy_to_user((unsigned long *) data, &fault,
+ sizeof(fault));
+ break;
+ }
+#endif
+
+ case PTRACE_LDT: {
+ struct ptrace_ldt ldt;
+
+ if(copy_from_user(&ldt, (unsigned long *) data,
+ sizeof(ldt))){
+ ret = -EIO;
+ break;
+ }
+ ret = __modify_ldt(child->mm, ldt.func, ldt.ptr, ldt.bytecount);
+ break;
+ }
+
+ case PTRACE_SWITCH_MM: {
+ struct mm_struct *old = child->mm;
+ struct mm_struct *new = proc_mm_get_mm(data);
+
+ if(IS_ERR(new)){
+ ret = PTR_ERR(new);
+ break;
+ }
+
+ atomic_inc(&new->mm_users);
+
+ lock_fix_dumpable_setting(child, new);
+
+ child->mm = new;
+ child->active_mm = new;
+
+ task_unlock(child);
+
+ mmput(old);
+ ret = 0;
+ break;
+ }
+#endif
+
default:
ret = ptrace_request(child, request, addr, data);
break;
--- arch/x86/kernel/sys_i386_32.c
+++ arch/x86/kernel/sys_i386_32.c
@@ -18,19 +18,20 @@
#include <linux/file.h>
#include <linux/utsname.h>
#include <linux/ipc.h>
+#include <asm/proc_mm_32.h>
#include <linux/uaccess.h>
#include <linux/unistd.h>
#include <asm/syscalls.h>
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+
+long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
{
int error = -EBADF;
struct file *file = NULL;
- struct mm_struct *mm = current->mm;
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
if (!(flags & MAP_ANONYMOUS)) {
@@ -40,7 +41,7 @@
}
down_write(&mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+ error = __do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff);
up_write(&mm->mmap_sem);
if (file)
@@ -49,6 +50,18 @@
return error;
}
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
+{
+ long ret = do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff);
+
+ /* A tail call would reorder parameters on the stack and they would then
+ * be restored at the wrong places. */
+ asmlinkage_protect(0, ret);
+ return ret;
+}
+
/*
* Perform the select(nd, in, out, ex, tv) and mmap() system
* calls. Linux/i386 didn't use to be able to handle more than
@@ -77,8 +90,11 @@
if (a.offset & ~PAGE_MASK)
goto out;
- err = sys_mmap2(a.addr, a.len, a.prot, a.flags,
+ err = do_mmap2(current->mm, a.addr, a.len, a.prot, a.flags,
a.fd, a.offset >> PAGE_SHIFT);
+ /* A tail call would reorder parameters on the stack and they would then
+ * be restored at the wrong places. */
+ asmlinkage_protect(0, err);
out:
return err;
}
--- arch/x86/kernel/sys_x86_64.c
+++ arch/x86/kernel/sys_x86_64.c
@@ -17,8 +17,9 @@
#include <asm/ia32.h>
#include <asm/syscalls.h>
+#include <asm/proc_mm_64.h>
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+long do64_mmap(struct mm_struct *mm, unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long off)
{
@@ -37,9 +38,9 @@
if (!file)
goto out;
}
- down_write(¤t->mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT);
- up_write(¤t->mm->mmap_sem);
+ down_write(&mm->mmap_sem);
+ error = __do_mmap_pgoff(mm, file, addr, len, prot, flags, off >> PAGE_SHIFT);
+ up_write(&mm->mmap_sem);
if (file)
fput(file);
@@ -47,6 +48,12 @@
return error;
}
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long off)
+{
+ return do64_mmap(current->mm, addr, len, prot, flags, fd, off);
+}
+
static void find_start_end(unsigned long flags, unsigned long *begin,
unsigned long *end)
{
--- arch/x86/mm/Makefile
+++ arch/x86/mm/Makefile
@@ -17,4 +17,8 @@
obj-$(CONFIG_K8_NUMA) += k8topology_64.o
obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
+ifneq ($(CONFIG_X86_32),y)
+obj-$(CONFIG_PROC_MM) += proc_mm_64.o
+endif
+
obj-$(CONFIG_MEMTEST) += memtest.o
--- arch/x86/mm/proc_mm_64.c
+++ arch/x86/mm/proc_mm_64.c
@@ -0,0 +1,85 @@
+#include <linux/proc_mm.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/mman.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+
+ssize_t write_proc_mm_emul(struct file *file, const char *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct mm_struct *mm = file->private_data;
+ struct proc_mm_op32 req;
+ int n, ret;
+
+ if(count > sizeof(req))
+ return(-EINVAL);
+
+ n = copy_from_user(&req, buffer, count);
+ if(n != 0)
+ return(-EFAULT);
+
+ ret = count;
+ switch(req.op){
+ case MM_MMAP: {
+ struct mm_mmap32 *map = &req.u.mmap;
+
+ /* Nobody ever noticed it, but do_mmap_pgoff() calls
+ * get_unmapped_area() which checks current->mm, if
+ * MAP_FIXED is not set, so mmap() could replace
+ * an old mapping.
+ */
+ if (! (map->flags & MAP_FIXED))
+ return(-EINVAL);
+
+ ret = __do_mmap(mm, map->addr, map->len, map->prot,
+ map->flags, map->fd, map->offset);
+ if((ret & ~PAGE_MASK) == 0)
+ ret = count;
+
+ break;
+ }
+ case MM_MUNMAP: {
+ struct mm_munmap32 *unmap = &req.u.munmap;
+
+ down_write(&mm->mmap_sem);
+ ret = do_munmap(mm, unmap->addr, unmap->len);
+ up_write(&mm->mmap_sem);
+
+ if(ret == 0)
+ ret = count;
+ break;
+ }
+ case MM_MPROTECT: {
+ struct mm_mprotect32 *protect = &req.u.mprotect;
+
+ ret = do_mprotect(mm, protect->addr, protect->len,
+ protect->prot);
+ if(ret == 0)
+ ret = count;
+ break;
+ }
+
+ case MM_COPY_SEGMENTS: {
+ struct mm_struct *from = proc_mm_get_mm_emul(req.u.copy_segments);
+
+ if(IS_ERR(from)){
+ ret = PTR_ERR(from);
+ break;
+ }
+
+ ret = copy_context(mm, from);
+ if(ret == 0)
+ ret = count;
+ break;
+ }
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
--- include/linux/mm.h
+++ include/linux/mm.h
@@ -5,6 +5,7 @@
#ifdef __KERNEL__
+#include <linux/proc_mm.h>
#include <linux/gfp.h>
#include <linux/list.h>
#include <linux/mmdebug.h>
@@ -1119,11 +1120,18 @@
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
-extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+extern unsigned long __do_mmap_pgoff(struct mm_struct *mm, struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flag,
+ unsigned long pgoff);
+static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
- unsigned long flag, unsigned long pgoff);
-extern unsigned long mmap_region(struct file *file, unsigned long addr,
- unsigned long len, unsigned long flags,
+ unsigned long flag, unsigned long pgoff) {
+ return __do_mmap_pgoff(current->mm, file, addr, len, prot, flag, pgoff);
+}
+
+extern unsigned long mmap_region(struct mm_struct *mm, struct file *file,
+ unsigned long addr, unsigned long len, unsigned long flags,
unsigned int vm_flags, unsigned long pgoff,
int accountable);
@@ -1142,6 +1150,9 @@
extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+extern long do_mprotect(struct mm_struct *mm, unsigned long start,
+ size_t len, unsigned long prot);
+
extern unsigned long do_brk(unsigned long, unsigned long);
/* filemap.c */
--- include/linux/proc_mm.h
+++ include/linux/proc_mm.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2002 Jeff Dike (jd...@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __PROC_MM_H
+#define __PROC_MM_H
+
+#include <linux/sched.h>
+#include <linux/compiler.h>
+
+/* The differences between this one and do_mmap are that:
+ * - we must perform controls for userspace-supplied params (which are
+ * arch-specific currently). And also fget(fd) if needed and so on...
+ * - we must accept the struct mm_struct on which to act as first param, and the
+ * offset in byte rather than page units as last param.
+ */
+static inline long __do_mmap(struct mm_struct *mm, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd,
+ unsigned long off);
+
+/*XXX: this is defined on x86_64, but not on every 64-bit arch (not on sh64).*/
+#ifdef CONFIG_64BIT
+
+#include <asm/proc_mm_64.h>
+#define write_proc_mm write_proc_mm_emul
+#define write_proc_mm64 write_proc_mm_native
+
+/* It would make more sense to do this mapping the reverse direction, to map the
+ * called name to the defined one and not the reverse. Like the 2nd example
+ */
+/*#define proc_mm_get_mm proc_mm_get_mm_emul
+#define proc_mm_get_mm64 proc_mm_get_mm_native*/
+
+#define proc_mm_get_mm_emul proc_mm_get_mm
+#define proc_mm_get_mm_native proc_mm_get_mm64
+
+#else
+
+#include <asm/proc_mm_32.h>
+#define write_proc_mm write_proc_mm_native
+#undef write_proc_mm64
+
+/*#define proc_mm_get_mm proc_mm_get_mm_native
+#undef proc_mm_get_mm64*/
+
+#define proc_mm_get_mm_native proc_mm_get_mm
+#undef proc_mm_get_mm_emul
+
+#endif
+
+#define MM_MMAP 54
+#define MM_MUNMAP 55
+#define MM_MPROTECT 56
+#define MM_COPY_SEGMENTS 57
+
+struct mm_mmap {
+ unsigned long addr;
+ unsigned long len;
+ unsigned long prot;
+ unsigned long flags;
+ unsigned long fd;
+ unsigned long offset;
+};
+
+struct mm_munmap {
+ unsigned long addr;
+ unsigned long len;
+};
+
+struct mm_mprotect {
+ unsigned long addr;
+ unsigned long len;
+ unsigned int prot;
+};
+
+struct proc_mm_op {
+ int op;
+ union {
+ struct mm_mmap mmap;
+ struct mm_munmap munmap;
+ struct mm_mprotect mprotect;
+ int copy_segments;
+ } u;
+};
+
+extern struct mm_struct *proc_mm_get_mm(int fd);
+
+/* Cope with older kernels */
+#ifndef __acquires
+#define __acquires(x)
+#endif
+
+#ifdef CONFIG_PROC_MM_DUMPABLE
+/*
+ * Since we take task_lock of child and it's needed also by the caller, we
+ * return with it locked.
+ */
+extern void lock_fix_dumpable_setting(struct task_struct * child,
+ struct mm_struct* new) __acquires(child->alloc_lock);
+#else
+static inline void lock_fix_dumpable_setting(struct task_struct * child,
+ struct mm_struct* new) __acquires(child->alloc_lock)
+{
+ task_lock(child);
+}
+#endif
+
+#endif
--- mm/fremap.c
+++ mm/fremap.c
@@ -197,7 +197,7 @@
flags &= MAP_NONBLOCK;
get_file(file);
- addr = mmap_region(file, start, size,
+ addr = mmap_region(current->mm, file, start, size,
flags, vma->vm_flags, pgoff, 1);
fput(file);
if (IS_ERR_VALUE(addr)) {
--- mm/Makefile
+++ mm/Makefile
@@ -32,5 +32,10 @@
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
+obj-$(CONFIG_PROC_MM) += proc_mm.o
+
+ifeq ($(CONFIG_PROC_MM),y)
+obj-m += proc_mm-mod.o
+endif
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
--- mm/mmap.c
+++ mm/mmap.c
@@ -907,12 +907,11 @@
/*
* The caller must hold down_write(current->mm->mmap_sem).
*/
-
-unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
- unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long pgoff)
+unsigned long __do_mmap_pgoff(struct mm_struct *mm, struct file * file,
+ unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long pgoff)
{
- struct mm_struct * mm = current->mm;
struct inode *inode;
unsigned int vm_flags;
int error;
@@ -1051,10 +1050,10 @@
if (error)
return error;
- return mmap_region(file, addr, len, flags, vm_flags, pgoff,
+ return mmap_region(mm, file, addr, len, flags, vm_flags, pgoff,
accountable);
}
-EXPORT_SYMBOL(do_mmap_pgoff);
+EXPORT_SYMBOL(__do_mmap_pgoff);
/*
* Some shared mappigns will want the pages marked read-only
@@ -1088,12 +1087,12 @@
mapping_cap_account_dirty(vma->vm_file->f_mapping);
}
-unsigned long mmap_region(struct file *file, unsigned long addr,
+unsigned long mmap_region(struct mm_struct *mm,
+ struct file *file, unsigned long addr,
unsigned long len, unsigned long flags,
unsigned int vm_flags, unsigned long pgoff,
int accountable)
{
- struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
struct vm_area_struct *merged_vma;
int correct_wcount = 0;
--- mm/mprotect.c
+++ mm/mprotect.c
@@ -219,8 +219,9 @@
return error;
}
-SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
- unsigned long, prot)
+long
+do_mprotect(struct mm_struct *mm, unsigned long start, size_t len,
+ unsigned long prot)
{
unsigned long vm_flags, nstart, end, tmp, reqprot;
struct vm_area_struct *vma, *prev;
@@ -250,9 +251,9 @@
vm_flags = calc_vm_prot_bits(prot);
- down_write(¤t->mm->mmap_sem);
+ down_write(&mm->mmap_sem);
- vma = find_vma_prev(current->mm, start, &prev);
+ vma = find_vma_prev(mm, start, &prev);
error = -ENOMEM;
if (!vma)
goto out;
@@ -314,6 +315,15 @@
}
}
out:
- up_write(¤t->mm->mmap_sem);
+ up_write(&mm->mmap_sem);
return error;
}
+
+SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, unsigned long, prot)
+{
+ long ret = do_mprotect(current->mm, start, len, prot);
+ /* A tail call would reorder parameters on the stack and they would then
+ * be restored at the wrong places. */
+ asmlinkage_protect(0, ret);
+ return ret;
+}
--- mm/proc_mm.c
+++ mm/proc_mm.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2002 Jeff Dike (jd...@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/compiler.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/proc_mm.h>
+#include <linux/file.h>
+#include <linux/mman.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_PROC_MM_DUMPABLE
+/* Checks if a task must be considered dumpable
+ *
+ * XXX: copied from fs/proc/base.c, removed task_lock, added rmb(): this must be
+ * called with task_lock(task) held. */
+static int task_dumpable(struct task_struct *task)
+{
+ int dumpable = 0;
+ struct mm_struct *mm;
+
+ mm = task->mm;
+ if (mm) {
+ rmb();
+ dumpable = mm->dumpable;
+ }
+ return dumpable;
+}
+
+/*
+ * This is to be used in PTRACE_SWITCH_MM handling. We are going to set
+ * child->mm to new, and we must first correctly set new->dumpable.
+ * Since we take task_lock of child and it's needed also by the caller, we
+ * return with it locked.
+ */
+void lock_fix_dumpable_setting(struct task_struct* child, struct mm_struct* new)
+ __acquires(child->alloc_lock)
+{
+ int dumpable = 1;
+
+ /* We must be safe.
+ * If the child is ptraced from a non-dumpable process,
+ * let's not be dumpable. If the child is non-dumpable itself,
+ * copy this property across mm's.
+ *
+ * Don't try to be smart for the opposite case and turn
+ * child->mm->dumpable to 1: I've not made sure it is safe.
+ */
+
+ task_lock(current);
+ if (unlikely(!task_dumpable(current))) {
+ dumpable = 0;
+ }
+ task_unlock(current);
+
+ task_lock(child);
+ if (likely(dumpable) && unlikely(!task_dumpable(child))) {
+ dumpable = 0;
+ }
+
+ if (!dumpable) {
+ new->dumpable = 0;
+ wmb();
+ }
+}
+#endif
+
+/* Naming conventions are a mess, so I note them down.
+ *
+ * Things ending in _mm can be for everything. It's only for
+ * {open,release}_proc_mm.
+ *
+ * For the rest:
+ *
+ * _mm means /proc/mm, _mm64 means /proc/mm64. This is for the infrastructure
+ * only (for instance proc_mm_get_mm checks whether the file is /proc/mm or
+ * /proc/mm64; for instance the /proc handling).
+ *
+ * While for what is conversion dependant, we use the suffix _native and _emul.
+ * In some cases, there is a mapping between these ones (defined by
+ * <asm/proc_mm.h>).
+ */
+
+/*These two are common to everything.*/
+static int open_proc_mm(struct inode *inode, struct file *file)
+{
+ struct mm_struct *mm = mm_alloc();
+ int ret;
+
+ ret = -ENOMEM;
+ if(mm == NULL)
+ goto out_mem;
+
+ init_new_empty_context(mm);
+ arch_pick_mmap_layout(mm);
+#ifdef CONFIG_PROC_MM_DUMPABLE
+ mm->dumpable = current->mm->dumpable;
+ wmb();
+#endif
+
+ file->private_data = mm;
+
+ return 0;
+
+out_mem:
+ return ret;
+}
+
+static int release_proc_mm(struct inode *inode, struct file *file)
+{
+ struct mm_struct *mm = file->private_data;
+
+ mmput(mm);
+ return 0;
+}
+
+static struct file_operations proc_mm_fops;
+
+struct mm_struct *proc_mm_get_mm_native(int fd);
+
+static ssize_t write_proc_mm_native(struct file *file, const char *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct mm_struct *mm = file->private_data;
+ struct proc_mm_op req;
+ int n, ret;
+
+ if(count > sizeof(req))
+ return(-EINVAL);
+
+ n = copy_from_user(&req, buffer, count);
+ if(n != 0)
+ return(-EFAULT);
+
+ ret = count;
+ switch(req.op){
+ case MM_MMAP: {
+ struct mm_mmap *map = &req.u.mmap;
+
+ /* Nobody ever noticed it, but do_mmap_pgoff() calls
+ * get_unmapped_area() which checks current->mm, if
+ * MAP_FIXED is not set, so mmap() could replace
+ * an old mapping.
+ */
+ if (! (map->flags & MAP_FIXED))
+ return(-EINVAL);
+
+ ret = __do_mmap(mm, map->addr, map->len, map->prot,
+ map->flags, map->fd, map->offset);
+ if((ret & ~PAGE_MASK) == 0)
+ ret = count;
+
+ break;
+ }
+ case MM_MUNMAP: {
+ struct mm_munmap *unmap = &req.u.munmap;
+
+ down_write(&mm->mmap_sem);
+ ret = do_munmap(mm, unmap->addr, unmap->len);
+ up_write(&mm->mmap_sem);
+
+ if(ret == 0)
+ ret = count;
+ break;
+ }
+ case MM_MPROTECT: {
+ struct mm_mprotect *protect = &req.u.mprotect;
+
+ ret = do_mprotect(mm, protect->addr, protect->len,
+ protect->prot);
+ if(ret == 0)
+ ret = count;
+ break;
+ }
+
+ case MM_COPY_SEGMENTS: {
+ struct mm_struct *from = proc_mm_get_mm_native(req.u.copy_segments);
+
+ if(IS_ERR(from)){
+ ret = PTR_ERR(from);
+ break;
+ }
+
+ ret = copy_context(mm, from);
+ if(ret == 0)
+ ret = count;
+ break;
+ }
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/*These three are all for /proc/mm.*/
+struct mm_struct *proc_mm_get_mm(int fd)
+{
+ struct mm_struct *ret = ERR_PTR(-EBADF);
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ goto out;
+
+ ret = ERR_PTR(-EINVAL);
+ if(PDE(file->f_path.dentry->d_inode)->proc_fops != &proc_mm_fops)
+ goto out_fput;
+
+ ret = file->private_data;
+out_fput:
+ fput(file);
+out:
+ return(ret);
+}
+
+static struct file_operations proc_mm_fops = {
+ .open = open_proc_mm,
+ .release = release_proc_mm,
+ .write = write_proc_mm,
+};
+
+/*Macro-ify it to avoid the duplication.*/
+static int make_proc_mm(void)
+{
+ struct proc_dir_entry *ent;
+
+ ent = create_proc_entry("mm", 0222, NULL);
+ if(ent == NULL){
+ printk("make_proc_mm : Failed to register /proc/mm\n");
+ return(0);
+ }
+ ent->proc_fops = &proc_mm_fops;
+
+ return 0;
+}
+
+__initcall(make_proc_mm);
+
+/*XXX: change the option.*/
+#ifdef CONFIG_64BIT
+static struct file_operations proc_mm64_fops = {
+ .open = open_proc_mm,
+ .release = release_proc_mm,
+ .write = write_proc_mm64,
+};
+
+static int make_proc_mm64(void)
+{
+ struct proc_dir_entry *ent;
+
+ ent = create_proc_entry("mm64", 0222, NULL);
+ if(ent == NULL){
+ printk("make_proc_mm : Failed to register /proc/mm64\n");
+ return(0);
+ }
+ ent->proc_fops = &proc_mm64_fops;
+
+ return 0;
+}
+
+__initcall(make_proc_mm64);
+
+struct mm_struct *proc_mm_get_mm64(int fd)
+{
+ struct mm_struct *ret = ERR_PTR(-EBADF);
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ goto out;
+
+ ret = ERR_PTR(-EINVAL);
+ /*This is the only change.*/
+ if(file->f_op != &proc_mm64_fops)
+ goto out_fput;
+
+ ret = file->private_data;
+out_fput:
+ fput(file);
+out:
+ return(ret);
+}
+#endif
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- mm/proc_mm-mod.c
+++ mm/proc_mm-mod.c
@@ -0,0 +1,50 @@
+#include <linux/kernel.h>
+#include <linux/proc_mm.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+
+#ifdef CONFIG_64BIT
+#define PRINT_OFFSET(type, member) \
+ printk(KERN_DEBUG "struct " #type "32->" #member " \t: %ld\n", (long) offsetof(struct type ## 32, member))
+#else
+#define PRINT_OFFSET(type, member) \
+ printk(KERN_DEBUG "struct " #type "->" #member " \t: %ld\n", (long) offsetof(struct type, member))
+#endif
+
+static int debug_printoffsets(void)
+{
+ printk(KERN_DEBUG "Skas core structures layout BEGIN:\n");
+ PRINT_OFFSET(mm_mmap, addr);
+ PRINT_OFFSET(mm_mmap, len);
+ PRINT_OFFSET(mm_mmap, prot);
+ PRINT_OFFSET(mm_mmap, flags);
+ PRINT_OFFSET(mm_mmap, fd);
+ PRINT_OFFSET(mm_mmap, offset);
+
+ PRINT_OFFSET(mm_munmap, addr);
+ PRINT_OFFSET(mm_munmap, len);
+
+ PRINT_OFFSET(mm_mprotect, addr);
+ PRINT_OFFSET(mm_mprotect, len);
+ PRINT_OFFSET(mm_mprotect, prot);
+
+ PRINT_OFFSET(proc_mm_op, op);
+ PRINT_OFFSET(proc_mm_op, u);
+ PRINT_OFFSET(proc_mm_op, u.mmap);
+ PRINT_OFFSET(proc_mm_op, u.munmap);
+ PRINT_OFFSET(proc_mm_op, u.mprotect);
+ PRINT_OFFSET(proc_mm_op, u.copy_segments);
+
+ PRINT_OFFSET(ptrace_faultinfo, is_write);
+ PRINT_OFFSET(ptrace_faultinfo, addr);
+
+ PRINT_OFFSET(ptrace_ldt, func);
+ PRINT_OFFSET(ptrace_ldt, ptr);
+ PRINT_OFFSET(ptrace_ldt, bytecount);
+ printk(KERN_DEBUG "Skas core structures layout END.\n");
+
+ return 0;
+}
+#undef PRINT_OFFSET
+
+module_init(debug_printoffsets);
------------------------------------------------------------------------------
Crystal Reports - New Free Runtime and 30 Day Trial
Check out the new simplified licensing option that enables
unlimited royalty-free distribution of the report engine
for externally facing server and web deployment.
http://p.sf.net/sfu/businessobjects
_______________________________________________
User-mode-linux-user mailing list
User-mode-linux-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-user