execshield rebase

Kyle McMartin Thu, 04 Dec 2008 13:38:58 -0800

execshield rebased against 2.6.28, merged by git and then fixed up
by hand.


build tested against x86-64 and pae/non-pae i386.

i'm uploading a scratch srpm but it will take a damned long time to
upload so i'll include the build id in a reply.

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e6b82b1..6e03105 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -381,6 +381,22 @@ static inline void set_system_intr_gate_ist(int n, void 
*addr, unsigned ist)
        _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
 }
 
+#ifdef CONFIG_X86_32
+static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
+{
+       limit = (limit - 1) / PAGE_SIZE;
+       desc->a = limit & 0xffff;
+       desc->b = (limit & 0xf0000) | 0x00c0fb00;
+}
+
+#define load_user_cs_desc(cpu, mm) \
+       get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = 
(mm)->context.user_cs
+
+extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
+extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
+extern void arch_flush_exec_range(struct mm_struct *mm);
+#endif /* CONFIG_X86_32 */
+
 #else
 /*
  * GET_DESC_BASE reads the descriptor base of the specified segment.
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 80a1dee..8314c66 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -7,12 +7,19 @@
 /*
  * The x86 doesn't have a mmu context, but
  * we put the segment information here.
+ *
+ * exec_limit is used to track the range PROT_EXEC
+ * mappings span.
  */
 typedef struct {
        void *ldt;
        int size;
        struct mutex lock;
        void *vdso;
+#ifdef CONFIG_X86_32
+       struct desc_struct user_cs;
+       unsigned long exec_limit;
+#endif
 } mm_context_t;
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5ca01e3..4f319b1 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -154,6 +154,9 @@ static inline int hlt_works(int cpu)
 
 #define cache_line_size()      (boot_cpu_data.x86_cache_alignment)
 
+#define __HAVE_ARCH_ALIGN_STACK
+extern unsigned long arch_align_stack(unsigned long sp);
+
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
 extern struct pt_regs *idle_regs(struct pt_regs *);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b9c9ea0..666dd0e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -687,6 +687,21 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
         * we do "generic changes."
         */
 
+       /*
+        *  emulation of NX with segment limits unfortunately means
+        *  we have to disable the fast system calls, due to the way that
+        *  sysexit clears the segment limits on return.
+        *  If we have either disabled exec-shield on the boot command line,
+        *  or we have NX, then we don't need to do this.
+        */
+       if (exec_shield != 0) {
+#ifdef CONFIG_X86_PAE
+               if (!test_cpu_cap(c, X86_FEATURE_NX))
+#endif
+                       clear_cpu_cap(c, X86_FEATURE_SEP);
+       }
+
+
        /* If the model name is still unset, do table lookup. */
        if (!c->x86_model_id[0]) {
                char *p;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0a1302f..ab96a10 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -354,6 +354,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, 
unsigned long new_sp)
        regs->cs                = __USER_CS;
        regs->ip                = new_ip;
        regs->sp                = new_sp;
+       preempt_disable();
+       load_user_cs_desc(smp_processor_id(), current->mm);
+       preempt_enable();
+
        /*
         * Free the old FP and other extended state
         */
@@ -558,7 +562,8 @@ struct task_struct * __switch_to(struct task_struct 
*prev_p, struct task_struct
        /* never put a printk in __switch_to... printk() calls wake_up*() 
indirectly */
 
        __unlazy_fpu(prev_p);
-
+       if (next_p->mm)
+               load_user_cs_desc(cpu, next_p->mm);
 
        /* we're going to use this soon, after a few expensive things */
        if (next_p->fpu_counter > 5)
@@ -731,3 +736,39 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
        unsigned long range_end = mm->brk + 0x02000000;
        return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 }
+
+static void modify_cs(struct mm_struct *mm, unsigned long limit)
+{
+       mm->context.exec_limit = limit;
+       set_user_cs(&mm->context.user_cs, limit);
+       if (mm == current->mm) {
+               preempt_disable();
+               load_user_cs_desc(smp_processor_id(), mm);
+               preempt_enable();
+       }
+}
+
+void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)
+{
+       if (limit > mm->context.exec_limit)
+               modify_cs(mm, limit);
+}
+
+void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)
+{
+       struct vm_area_struct *vma;
+       unsigned long limit = PAGE_SIZE;
+
+       if (old_end == mm->context.exec_limit) {
+               for (vma = mm->mmap; vma; vma = vma->vm_next)
+                       if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+                               limit = vma->vm_end;
+               modify_cs(mm, limit);
+       }
+}
+
+void arch_flush_exec_range(struct mm_struct *mm)
+{
+       mm->context.exec_limit = 0;
+       set_user_cs(&mm->context.user_cs, 0);
+}
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index f4049f3..e7a1b7c 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -2,6 +2,7 @@
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
 
+#include <asm/desc.h>
 #include <asm/tlbflush.h>
 
 DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
@@ -92,6 +93,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
        unsigned long cpu;
 
        cpu = get_cpu();
+       if (current->active_mm)
+               load_user_cs_desc(cpu, current->active_mm);
 
        if (!cpu_isset(cpu, flush_cpumask))
                goto out;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 04d242a..5f37540 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -159,6 +159,63 @@ static int lazy_iobitmap_copy(void)
 
        return 0;
 }
+
+/*
+ * lazy-check for CS validity on exec-shield binaries:
+ *
+ * the original non-exec stack patch was written by
+ * Solar Designer <solar at openwall.com>. Thanks!
+ */
+static int
+check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
+{
+       struct desc_struct *desc1, *desc2;
+       struct vm_area_struct *vma;
+       unsigned long limit;
+
+       if (current->mm == NULL)
+               return 0;
+
+       limit = -1UL;
+       if (current->mm->context.exec_limit != -1UL) {
+               limit = PAGE_SIZE;
+               spin_lock(&current->mm->page_table_lock);
+               for (vma = current->mm->mmap; vma; vma = vma->vm_next)
+                       if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+                               limit = vma->vm_end;
+               vma = get_gate_vma(current);
+               if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+                       limit = vma->vm_end;
+               spin_unlock(&current->mm->page_table_lock);
+               if (limit >= TASK_SIZE)
+                       limit = -1UL;
+               current->mm->context.exec_limit = limit;
+       }
+       set_user_cs(&current->mm->context.user_cs, limit);
+
+       desc1 = &current->mm->context.user_cs;
+       desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;
+
+       if ((desc1->a & 0xff0000ff) != (desc2->a & 0xff0000ff) ||
+           desc1->b != desc2->b) {
+               /*
+                * The CS was not in sync - reload it and retry the
+                * instruction. If the instruction still faults then
+                * we won't hit this branch next time around.
+                */
+               if (print_fatal_signals >= 2) {
+                       printk(KERN_ERR "#GFP fixup (%ld[seg:%lx]) at %08lx, 
CPU#%d.\n",
+                               error_code, error_code/8, regs->ip, 
smp_processor_id());
+                       printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, 
CPU_cs: %08x/%08x.\n",
+                               current->mm->context.exec_limit, desc1->a, 
desc1->b, desc2->a, desc2->b);
+               }
+               load_user_cs_desc(cpu, current->mm);
+
+               return 1;
+       }
+
+       return 0;
+}
 #endif
 
 static void __kprobes
@@ -320,6 +377,29 @@ do_general_protection(struct pt_regs *regs, long 
error_code)
        if (!user_mode(regs))
                goto gp_in_kernel;
 
+#ifdef CONFIG_X86_32
+{
+       int cpu;
+       int ok;
+
+       cpu = get_cpu();
+       ok = check_lazy_exec_limit(cpu, regs, error_code);
+       put_cpu();
+
+       if (ok)
+               return;
+
+       if (print_fatal_signals) {
+               printk(KERN_ERR "#GFP(%ld[seg:%lx]) at %08lx, CPU#%d.\n",
+                       error_code, error_code/8, regs->ip, smp_processor_id());
+               printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",
+                       current->mm->context.exec_limit,
+                       current->mm->context.user_cs.a,
+                       current->mm->context.user_cs.b);
+       }
+}
+#endif /*CONFIG_X86_32*/
+
        tsk->thread.error_code = error_code;
        tsk->thread.trap_no = 13;
 
@@ -931,19 +1011,37 @@ do_device_not_available(struct pt_regs *regs, long error)
 }
 
 #ifdef CONFIG_X86_32
+/*
+ * The fixup code for errors in iret jumps to here (iret_exc). It loses
+ * the original trap number and erorr code. The bogus trap 32 and error
+ * code 0 are what the vanilla kernel delivers via:
+ * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
+ *
+ * NOTE: Because of the final "1" in the macro we need to enable interrupts.
+ *
+ * In case of a general protection fault in the iret instruction, we
+ * need to check for a lazy CS update for exec-shield.
+ */
 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 {
-       siginfo_t info;
+       int ok;
+       int cpu;
+
        local_irq_enable();
 
-       info.si_signo = SIGILL;
-       info.si_errno = 0;
-       info.si_code = ILL_BADSTK;
-       info.si_addr = 0;
-       if (notify_die(DIE_TRAP, "iret exception",
-                       regs, error_code, 32, SIGILL) == NOTIFY_STOP)
-               return;
-       do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
+       cpu = get_cpu();
+       ok = check_lazy_exec_limit(cpu, regs, error_code);
+       put_cpu();
+
+       if (!ok && notify_die(DIE_TRAP, "iret exception", regs,
+               error_code, 32, SIGSEGV) != NOTIFY_STOP) {
+                       siginfo_t info;
+                       info.si_signo = SIGSEGV;
+                       info.si_errno = 0;
+                       info.si_code = ILL_BADSTK;
+                       info.si_addr = 0;
+                       do_trap(32, SIGSEGV, "iret exception", 0, error_code, 
&info);
+       }
 }
 #endif
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c483f42..8c3bbd9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -570,7 +570,7 @@ static int disable_nx __initdata;
  * Control non executable mappings.
  *
  * on      Enable
- * off     Disable
+ * off     Disable (disables exec-shield too)
  */
 static int __init noexec_setup(char *str)
 {
@@ -579,14 +579,12 @@ static int __init noexec_setup(char *str)
                        __supported_pte_mask |= _PAGE_NX;
                        disable_nx = 0;
                }
-       } else {
-               if (!strcmp(str, "off")) {
-                       disable_nx = 1;
-                       __supported_pte_mask &= ~_PAGE_NX;
-               } else {
-                       return -EINVAL;
-               }
-       }
+       } else if (!strcmp(str, "off")) {
+               disable_nx = 1;
+               __supported_pte_mask &= ~_PAGE_NX;
+               exec_shield = 0;
+       } else
+               return -EINVAL;
 
        return 0;
 }
@@ -845,7 +843,11 @@ unsigned long __init_refok init_memory_mapping(unsigned 
long start,
        set_nx();
        if (nx_enabled)
                printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+       else
 #endif
+       if (exec_shield)
+               printk(KERN_INFO "Using x86 segment limits to approximate "
+                       "NX protection\n");
 
        /* Enable PSE if available */
        if (cpu_has_pse)
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 56fe712..ec932ae 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -111,13 +111,15 @@ static unsigned long mmap_legacy_base(void)
  */
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
-       if (mmap_is_legacy()) {
+       if (!(2 & exec_shield) && mmap_is_legacy()) {
                mm->mmap_base = mmap_legacy_base();
                mm->get_unmapped_area = arch_get_unmapped_area;
                mm->unmap_area = arch_unmap_area;
        } else {
                mm->mmap_base = mmap_base();
                mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+               if (!(current->personality & READ_IMPLIES_EXEC))
+                       mm->get_unmapped_exec_area = 
arch_get_unmapped_exec_area;
                mm->unmap_area = arch_unmap_area_topdown;
        }
 }
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 513f330..2fb420a 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -331,7 +331,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int exstack)
        if (compat)
                addr = VDSO_HIGH_BASE;
        else {
-               addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+               addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
                if (IS_ERR_VALUE(addr)) {
                        ret = addr;
                        goto up_fail;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 8fcfa39..5e1e2d8 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -80,7 +80,7 @@ static struct linux_binfmt elf_format = {
                .hasvdso        = 1
 };
 
-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
+#define BAD_ADDR(x) IS_ERR_VALUE(x)
 
 static int set_brk(unsigned long start, unsigned long end)
 {
@@ -723,6 +723,11 @@ static int load_elf_binary(struct linux_binprm *bprm, 
struct pt_regs *regs)
                        break;
                }
 
+       if (current->personality == PER_LINUX && (exec_shield & 2)) {
+               executable_stack = EXSTACK_DISABLE_X;
+               current->flags |= PF_RANDOMIZE;
+       }
+
        /* Some simple consistency checks for the interpreter */
        if (elf_interpreter) {
                retval = -ELIBBAD;
@@ -742,6 +747,15 @@ static int load_elf_binary(struct linux_binprm *bprm, 
struct pt_regs *regs)
        if (retval)
                goto out_free_dentry;
 
+#ifdef CONFIG_X86_32
+       /*
+        * Turn off the CS limit completely if exec-shield disabled or
+        * NX active:
+        */
+       if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled)
+               arch_add_exec_range(current->mm, -1);
+#endif
+
        /* OK, This is the point of no return */
        current->flags &= ~PF_FORKNOEXEC;
        current->mm->def_flags = def_flags;
@@ -749,7 +763,8 @@ static int load_elf_binary(struct linux_binprm *bprm, 
struct pt_regs *regs)
        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
           may depend on the personality.  */
        SET_PERSONALITY(loc->elf_ex);
-       if (elf_read_implies_exec(loc->elf_ex, executable_stack))
+       if (!(exec_shield & 2) &&
+                       elf_read_implies_exec(loc->elf_ex, executable_stack))
                current->personality |= READ_IMPLIES_EXEC;
 
        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
@@ -914,7 +929,7 @@ static int load_elf_binary(struct linux_binprm *bprm, 
struct pt_regs *regs)
                                            interpreter,
                                            &interp_map_addr,
                                            load_bias);
-               if (!IS_ERR((void *)elf_entry)) {
+               if (!BAD_ADDR(elf_entry)) {
                        /*
                         * load_elf_interp() returns relocation
                         * adjustment
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6af7fba..bb98552 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -412,8 +412,13 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
                unlock_task_sighand(task, &flags);
        }
 
-       if (!whole || num_threads < 2)
-               wchan = get_wchan(task);
+       if (!whole || num_threads < 2) {
+               wchan = 0;
+               if (current->uid == task->uid || current->euid == task->uid ||
+                       capable(CAP_SYS_NICE))
+                               wchan = get_wchan(task);
+       }
+
        if (!whole) {
                min_flt = task->min_flt;
                maj_flt = task->maj_flt;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ffee2f7..c14817b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1105,7 +1105,13 @@ extern int install_special_mapping(struct mm_struct *mm,
                                   unsigned long addr, unsigned long len,
                                   unsigned long flags, struct page **pages);
 
-extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned 
long, unsigned long, unsigned long);
+extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, 
unsigned long, unsigned long, unsigned long, int);
+
+static inline unsigned long get_unmapped_area(struct file *file, unsigned long 
addr,
+               unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+       return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0);
+}
 
 extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
        unsigned long len, unsigned long prot,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fe82547..8007dbf 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -177,6 +177,9 @@ struct mm_struct {
        unsigned long (*get_unmapped_area) (struct file *filp,
                                unsigned long addr, unsigned long len,
                                unsigned long pgoff, unsigned long flags);
+       unsigned long (*get_unmapped_exec_area) (struct file *filp,
+                               unsigned long addr, unsigned long len,
+                               unsigned long pgoff, unsigned long flags);
        void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
        unsigned long mmap_base;                /* base of mmap area */
        unsigned long task_size;                /* size of task vm space */
diff --git a/include/linux/resource.h b/include/linux/resource.h
index 40fc7e6..68c2549 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -55,8 +55,11 @@ struct rlimit {
 /*
  * Limit the stack by to some sane default: root can always
  * increase this limit if needed..  8MB seems reasonable.
+ *
+ * (2MB more to cover randomization effects.)
  */
-#define _STK_LIM       (8*1024*1024)
+#define _STK_LIM       (10*1024*1024)
+#define EXEC_STACK_BIAS        (2*1024*1024)
 
 /*
  * GPG2 wants 64kB of mlocked memory, to make sure pass phrases
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55e30d1..d400ab0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -97,6 +97,9 @@ struct futex_pi_state;
 struct robust_list_head;
 struct bio;
 
+extern int exec_shield;
+extern int print_fatal_signals;
+
 /*
  * List of flags we want to share for kernel threads,
  * if only because they are not used by them anyway.
@@ -345,6 +348,10 @@ extern int sysctl_max_map_count;
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
                       unsigned long, unsigned long);
+
+extern unsigned long
+arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long,
+                      unsigned long, unsigned long);
 extern unsigned long
 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
                          unsigned long len, unsigned long pgoff,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3d56fe7..b512845 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -82,6 +82,26 @@ extern int percpu_pagelist_fraction;
 extern int compat_log;
 extern int latencytop_enabled;
 extern int sysctl_nr_open_min, sysctl_nr_open_max;
+
+int exec_shield = (1<<0);
+/* exec_shield is a bitmask:
+ * 0: off; vdso at STACK_TOP, 1 page below TASK_SIZE
+ * (1<<0) 1: on [also on if !=0]
+ * (1<<1) 2: force noexecstack regardless of PT_GNU_STACK
+ * The old settings
+ * (1<<2) 4: vdso just below .text of main (unless too low)
+ * (1<<3) 8: vdso just below .text of PT_INTERP (unless too low)
+ * are ignored because the vdso is placed completely randomly
+ */
+
+static int __init setup_exec_shield(char *str)
+{
+       get_option(&str, &exec_shield);
+
+       return 1;
+}
+__setup("exec-shield=", setup_exec_shield);
+
 #ifdef CONFIG_RCU_TORTURE_TEST
 extern int rcutorture_runnable;
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
@@ -373,6 +393,14 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
        {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "exec-shield",
+               .data           = &exec_shield,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
                .ctl_name       = KERN_CORE_USES_PID,
                .procname       = "core_uses_pid",
                .data           = &core_uses_pid,
diff --git a/mm/mmap.c b/mm/mmap.c
index d4855a6..0850042 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -27,6 +27,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
+#include <linux/random.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -43,6 +44,18 @@
 #define arch_rebalance_pgtables(addr, len)             (addr)
 #endif
 
+/* No sane architecture will #define these to anything else */
+#ifndef arch_add_exec_range
+#define arch_add_exec_range(mm, limit) do { ; } while (0)
+#endif
+#ifndef arch_flush_exec_range
+#define arch_flush_exec_range(mm)      do { ; } while (0)
+#endif
+#ifndef arch_remove_exec_range
+#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
+#endif
+
+
 static void unmap_region(struct mm_struct *mm,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
                unsigned long start, unsigned long end);
@@ -391,6 +404,8 @@ static inline void
 __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
                struct vm_area_struct *prev, struct rb_node *rb_parent)
 {
+       if (vma->vm_flags & VM_EXEC)
+               arch_add_exec_range(mm, vma->vm_end);
        if (prev) {
                vma->vm_next = prev->vm_next;
                prev->vm_next = vma;
@@ -494,6 +509,8 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct 
*vma,
        rb_erase(&vma->vm_rb, &mm->mm_rb);
        if (mm->mmap_cache == vma)
                mm->mmap_cache = prev;
+       if (vma->vm_flags & VM_EXEC)
+               arch_remove_exec_range(mm, vma->vm_end);
 }
 
 /*
@@ -800,6 +817,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                } else                                  /* cases 2, 5, 7 */
                        vma_adjust(prev, prev->vm_start,
                                end, prev->vm_pgoff, NULL);
+               if (prev->vm_flags & VM_EXEC)
+                       arch_add_exec_range(mm, prev->vm_end);
                return prev;
        }
 
@@ -955,7 +974,8 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned 
long addr,
        /* Obtain the address to map to. we verify (or select) it and ensure
         * that it represents a valid section of the address space.
         */
-       addr = get_unmapped_area(file, addr, len, pgoff, flags);
+       addr = get_unmapped_area_prot(file, addr, len, pgoff, flags,
+               prot & PROT_EXEC);
        if (addr & ~PAGE_MASK)
                return addr;
 
@@ -1440,13 +1460,17 @@ void arch_unmap_area_topdown(struct mm_struct *mm, 
unsigned long addr)
 }
 
 unsigned long
-get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
-               unsigned long pgoff, unsigned long flags)
+get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long 
len,
+               unsigned long pgoff, unsigned long flags, int exec)
 {
        unsigned long (*get_area)(struct file *, unsigned long,
                                  unsigned long, unsigned long, unsigned long);
 
-       get_area = current->mm->get_unmapped_area;
+       if (exec && current->mm->get_unmapped_exec_area)
+               get_area = current->mm->get_unmapped_exec_area;
+       else
+               get_area = current->mm->get_unmapped_area;
+
        if (file && file->f_op && file->f_op->get_unmapped_area)
                get_area = file->f_op->get_unmapped_area;
        addr = get_area(file, addr, len, pgoff, flags);
@@ -1460,8 +1484,74 @@ get_unmapped_area(struct file *file, unsigned long addr, 
unsigned long len,
 
        return arch_rebalance_pgtables(addr, len);
 }
+EXPORT_SYMBOL(get_unmapped_area_prot);
+
+#define SHLIB_BASE     0x00110000
+
+unsigned long arch_get_unmapped_exec_area(struct file *filp, unsigned long 
addr0,
+               unsigned long len0, unsigned long pgoff, unsigned long flags)
+{
+       unsigned long addr = addr0, len = len0;
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long tmp;
+
+       if (len > TASK_SIZE)
+               return -ENOMEM;
+
+       if (flags & MAP_FIXED)
+               return addr;
+
+       if (!addr) {
+               addr = randomize_range(SHLIB_BASE, 0x01000000, len);
+       } else {
+               addr = PAGE_ALIGN(addr);
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+                   (!vma || addr + len <= vma->vm_start))
+                       return addr;
+       }
+
+       addr = SHLIB_BASE;
+       for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+               /* At this point:  (!vma || addr < vma->vm_end). */
+               if (TASK_SIZE - len < addr)
+                       return -ENOMEM;
+
+               if (!vma || addr + len <= vma->vm_start) {
+                       /*
+                        * Must not let a PROT_EXEC mapping get into the
+                        * brk area:
+                        */
+                       if (addr + len > mm->brk)
+                               goto failed;
+
+                       /*
+                        * Up until the brk area we randomize addresses
+                        * as much as possible:
+                        */
+                       if (addr >= 0x01000000) {
+                               tmp = randomize_range(0x01000000,
+                                       PAGE_ALIGN(max(mm->start_brk,
+                                       (unsigned long)0x08000000)), len);
+                               vma = find_vma(mm, tmp);
+                               if (TASK_SIZE - len >= tmp &&
+                                   (!vma || tmp + len <= vma->vm_start))
+                                       return tmp;
+                       }
+                       /*
+                        * Ok, randomization didnt work out - return
+                        * the result of the linear search:
+                        */
+                       return addr;
+               }
+               addr = vma->vm_end;
+       }
+
+failed:
+       return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags);
+}
 
-EXPORT_SYMBOL(get_unmapped_area);
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
@@ -1536,6 +1626,14 @@ out:
        return prev ? prev->vm_next : vma;
 }
 
+static int over_stack_limit(unsigned long sz)
+{
+       if (sz < EXEC_STACK_BIAS)
+               return 0;
+       return (sz - EXEC_STACK_BIAS) >
+                       current->signal->rlim[RLIMIT_STACK].rlim_cur;
+}
+
 /*
  * Verify that the stack growth is acceptable and
  * update accounting. This is shared with both the
@@ -1552,7 +1650,7 @@ static int acct_stack_growth(struct vm_area_struct * vma, 
unsigned long size, un
                return -ENOMEM;
 
        /* Stack limit test */
-       if (size > rlim[RLIMIT_STACK].rlim_cur)
+       if (over_stack_limit(size))
                return -ENOMEM;
 
        /* mlock limit tests */
@@ -1862,10 +1960,14 @@ int split_vma(struct mm_struct * mm, struct 
vm_area_struct * vma,
        if (new->vm_ops && new->vm_ops->open)
                new->vm_ops->open(new);
 
-       if (new_below)
+       if (new_below) {
+               unsigned long old_end = vma->vm_end;
+
                vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
                        ((addr - new->vm_start) >> PAGE_SHIFT), new);
-       else
+               if (vma->vm_flags & VM_EXEC)
+                       arch_remove_exec_range(mm, old_end);
+       } else
                vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 
        return 0;
@@ -2109,6 +2211,7 @@ void exit_mmap(struct mm_struct *mm)
        vm_unacct_memory(nr_accounted);
        free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
        tlb_finish_mmu(tlb, 0, end);
+       arch_flush_exec_range(mm);
 
        /*
         * Walk the list again, actually closing and freeing it,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index fded06f..cdfcbd2 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -24,8 +24,15 @@
 #include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
+#ifdef CONFIG_X86
+#include <asm/desc.h>
+#endif
+#ifndef arch_remove_exec_range
+#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
+#endif
 
 #ifndef pgprot_modify
 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
@@ -140,7 +147,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct 
vm_area_struct **pprev,
        struct mm_struct *mm = vma->vm_mm;
        unsigned long oldflags = vma->vm_flags;
        long nrpages = (end - start) >> PAGE_SHIFT;
-       unsigned long charged = 0;
+       unsigned long charged = 0, old_end = vma->vm_end;
        pgoff_t pgoff;
        int error;
        int dirty_accountable = 0;
@@ -204,6 +211,9 @@ success:
                dirty_accountable = 1;
        }
 
+       if (oldflags & VM_EXEC)
+               arch_remove_exec_range(current->mm, old_end);
+
        mmu_notifier_invalidate_range_start(mm, start, end);
        if (is_vm_hugetlb_page(vma))
                hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
diff --git a/mm/mremap.c b/mm/mremap.c
index 58a2908..5bb50e6 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -400,8 +400,8 @@ unsigned long do_mremap(unsigned long addr,
                        if (vma->vm_flags & VM_MAYSHARE)
                                map_flags |= MAP_SHARED;
 
-                       new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
-                                               vma->vm_pgoff, map_flags);
+                       new_addr = get_unmapped_area_prot(vma->vm_file, 0, 
new_len,
+                               vma->vm_pgoff, map_flags, vma->vm_flags & 
VM_EXEC);
                        if (new_addr & ~PAGE_MASK) {
                                ret = new_addr;
                                goto out;

_______________________________________________
Fedora-kernel-list mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/fedora-kernel-list

execshield rebase

Reply via email to