Author: dchagin
Date: Sun Jul 10 08:22:04 2016
New Revision: 302517
URL: https://svnweb.freebsd.org/changeset/base/302517

Log:
  Fix a copy/paste bug introduced during X86_64 Linuxulator work.
  FreeBSD support NX bit on X86_64 processors out of the box, for i386 emulation
  use READ_IMPLIES_EXEC flag, introduced in r302515.
  
  While here move common part of mmap() and mprotect() code to the files in 
compat/linux
  to reduce code dupcliation between Linuxulator's.
  
  Reported by:    Johannes Jost Meixner, Shawn Webb
  
  MFC after:    1 week
  XMFC with:    r302515, r302516

Added:
  head/sys/compat/linux/linux_mmap.c   (contents, props changed)
  head/sys/compat/linux/linux_mmap.h   (contents, props changed)
Modified:
  head/sys/amd64/linux/linux.h
  head/sys/amd64/linux/linux_machdep.c
  head/sys/amd64/linux32/linux.h
  head/sys/amd64/linux32/linux32_machdep.c
  head/sys/i386/linux/linux.h
  head/sys/i386/linux/linux_machdep.c
  head/sys/modules/linux/Makefile
  head/sys/modules/linux_common/Makefile

Modified: head/sys/amd64/linux/linux.h
==============================================================================
--- head/sys/amd64/linux/linux.h        Sun Jul 10 08:17:16 2016        
(r302516)
+++ head/sys/amd64/linux/linux.h        Sun Jul 10 08:22:04 2016        
(r302517)
@@ -139,13 +139,6 @@ struct l_rlimit {
        l_ulong         rlim_max;
 };
 
-/* mmap options */
-#define        LINUX_MAP_SHARED        0x0001
-#define        LINUX_MAP_PRIVATE       0x0002
-#define        LINUX_MAP_FIXED         0x0010
-#define        LINUX_MAP_ANON          0x0020
-#define        LINUX_MAP_GROWSDOWN     0x0100
-
 /*
  * stat family of syscalls
  */

Modified: head/sys/amd64/linux/linux_machdep.c
==============================================================================
--- head/sys/amd64/linux/linux_machdep.c        Sun Jul 10 08:17:16 2016        
(r302516)
+++ head/sys/amd64/linux/linux_machdep.c        Sun Jul 10 08:22:04 2016        
(r302517)
@@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
 #include <compat/linux/linux_ipc.h>
 #include <compat/linux/linux_file.h>
 #include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_emul.h>
@@ -122,181 +123,19 @@ linux_set_upcall_kse(struct thread *td, 
        return (0);
 }
 
-#define STACK_SIZE  (2 * 1024 * 1024)
-#define GUARD_SIZE  (4 * PAGE_SIZE)
-
 int
 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
 {
-       struct proc *p = td->td_proc;
-       struct mmap_args /* {
-               caddr_t addr;
-               size_t len;
-               int prot;
-               int flags;
-               int fd;
-               long pad;
-               off_t pos;
-       } */ bsd_args;
-       int error;
-       struct file *fp;
-       cap_rights_t rights;
-
-       LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx",
-           args->addr, args->len, args->prot,
-           args->flags, args->fd, args->pgoff);
-
-       error = 0;
-       bsd_args.flags = 0;
-       fp = NULL;
-
-       /*
-        * Linux mmap(2):
-        * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
-        */
-       if (! ((args->flags & LINUX_MAP_SHARED) ^
-           (args->flags & LINUX_MAP_PRIVATE)))
-               return (EINVAL);
-
-       if (args->flags & LINUX_MAP_SHARED)
-               bsd_args.flags |= MAP_SHARED;
-       if (args->flags & LINUX_MAP_PRIVATE)
-               bsd_args.flags |= MAP_PRIVATE;
-       if (args->flags & LINUX_MAP_FIXED)
-               bsd_args.flags |= MAP_FIXED;
-       if (args->flags & LINUX_MAP_ANON)
-               bsd_args.flags |= MAP_ANON;
-       else
-               bsd_args.flags |= MAP_NOSYNC;
-       if (args->flags & LINUX_MAP_GROWSDOWN)
-               bsd_args.flags |= MAP_STACK;
-
-       /*
-        * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
-        * on Linux/i386. We do this to ensure maximum compatibility.
-        * Linux/ia64 does the same in i386 emulation mode.
-        */
-       bsd_args.prot = args->prot;
-       if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-               bsd_args.prot |= PROT_READ | PROT_EXEC;
-
-       /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
-       bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : args->fd;
-       if (bsd_args.fd != -1) {
-               /*
-                * Linux follows Solaris mmap(2) description:
-                * The file descriptor fildes is opened with
-                * read permission, regardless of the
-                * protection options specified.
-                */
-
-               error = fget(td, bsd_args.fd,
-                   cap_rights_init(&rights, CAP_MMAP), &fp);
-               if (error != 0 )
-                       return (error);
-               if (fp->f_type != DTYPE_VNODE) {
-                       fdrop(fp, td);
-                       return (EINVAL);
-               }
-
-               /* Linux mmap() just fails for O_WRONLY files */
-               if (!(fp->f_flag & FREAD)) {
-                       fdrop(fp, td);
-                       return (EACCES);
-               }
-
-               fdrop(fp, td);
-       }
 
-       if (args->flags & LINUX_MAP_GROWSDOWN) {
-               /*
-                * The Linux MAP_GROWSDOWN option does not limit auto
-                * growth of the region.  Linux mmap with this option
-                * takes as addr the initial BOS, and as len, the initial
-                * region size.  It can then grow down from addr without
-                * limit.  However, Linux threads has an implicit internal
-                * limit to stack size of STACK_SIZE.  Its just not
-                * enforced explicitly in Linux.  But, here we impose
-                * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
-                * region, since we can do this with our mmap.
-                *
-                * Our mmap with MAP_STACK takes addr as the maximum
-                * downsize limit on BOS, and as len the max size of
-                * the region.  It then maps the top SGROWSIZ bytes,
-                * and auto grows the region down, up to the limit
-                * in addr.
-                *
-                * If we don't use the MAP_STACK option, the effect
-                * of this code is to allocate a stack region of a
-                * fixed size of (STACK_SIZE - GUARD_SIZE).
-                */
-
-               if ((caddr_t)PTRIN(args->addr) + args->len >
-                   p->p_vmspace->vm_maxsaddr) {
-                       /*
-                        * Some Linux apps will attempt to mmap
-                        * thread stacks near the top of their
-                        * address space.  If their TOS is greater
-                        * than vm_maxsaddr, vm_map_growstack()
-                        * will confuse the thread stack with the
-                        * process stack and deliver a SEGV if they
-                        * attempt to grow the thread stack past their
-                        * current stacksize rlimit.  To avoid this,
-                        * adjust vm_maxsaddr upwards to reflect
-                        * the current stacksize rlimit rather
-                        * than the maximum possible stacksize.
-                        * It would be better to adjust the
-                        * mmap'ed region, but some apps do not check
-                        * mmap's return value.
-                        */
-                       PROC_LOCK(p);
-                       p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
-                           lim_cur_proc(p, RLIMIT_STACK);
-                       PROC_UNLOCK(p);
-               }
-
-               /*
-                * This gives us our maximum stack size and a new BOS.
-                * If we're using VM_STACK, then mmap will just map
-                * the top SGROWSIZ bytes, and let the stack grow down
-                * to the limit at BOS.  If we're not using VM_STACK
-                * we map the full stack, since we don't have a way
-                * to autogrow it.
-                */
-               if (args->len > STACK_SIZE - GUARD_SIZE) {
-                       bsd_args.addr = (caddr_t)PTRIN(args->addr);
-                       bsd_args.len = args->len;
-               } else {
-                       bsd_args.addr = (caddr_t)PTRIN(args->addr) -
-                           (STACK_SIZE - GUARD_SIZE - args->len);
-                       bsd_args.len = STACK_SIZE - GUARD_SIZE;
-               }
-       } else {
-               bsd_args.addr = (caddr_t)PTRIN(args->addr);
-               bsd_args.len  = args->len;
-       }
-       bsd_args.pos = (off_t)args->pgoff;
-
-       error = sys_mmap(td, &bsd_args);
-
-       LINUX_CTR2(mmap2, "return: %d (%p)",
-           error, td->td_retval[0]);
-       return (error);
+       return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot,
+               args->flags, args->fd, args->pgoff));
 }
 
 int
 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
 {
-       struct mprotect_args bsd_args;
-
-       LINUX_CTR(mprotect);
 
-       bsd_args.addr = uap->addr;
-       bsd_args.len = uap->len;
-       bsd_args.prot = uap->prot;
-       if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-               bsd_args.prot |= PROT_READ | PROT_EXEC;
-       return (sys_mprotect(td, &bsd_args));
+       return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, 
uap->prot));
 }
 
 int

Modified: head/sys/amd64/linux32/linux.h
==============================================================================
--- head/sys/amd64/linux32/linux.h      Sun Jul 10 08:17:16 2016        
(r302516)
+++ head/sys/amd64/linux32/linux.h      Sun Jul 10 08:22:04 2016        
(r302517)
@@ -165,13 +165,6 @@ struct l_rusage {
        l_long  ru_nivcsw;
 } __packed;
 
-/* mmap options */
-#define        LINUX_MAP_SHARED        0x0001
-#define        LINUX_MAP_PRIVATE       0x0002
-#define        LINUX_MAP_FIXED         0x0010
-#define        LINUX_MAP_ANON          0x0020
-#define        LINUX_MAP_GROWSDOWN     0x0100
-
 struct l_mmap_argv {
        l_uintptr_t     addr;
        l_size_t        len;

Modified: head/sys/amd64/linux32/linux32_machdep.c
==============================================================================
--- head/sys/amd64/linux32/linux32_machdep.c    Sun Jul 10 08:17:16 2016        
(r302516)
+++ head/sys/amd64/linux32/linux32_machdep.c    Sun Jul 10 08:22:04 2016        
(r302517)
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
 #include <amd64/linux32/linux32_proto.h>
 #include <compat/linux/linux_ipc.h>
 #include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_emul.h>
@@ -84,9 +85,6 @@ struct l_old_select_argv {
        l_uintptr_t     timeout;
 } __packed;
 
-static int     linux_mmap_common(struct thread *td, l_uintptr_t addr,
-                   l_size_t len, l_int prot, l_int flags, l_int fd,
-                   l_loff_t pos);
 
 static void
 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
@@ -448,9 +446,6 @@ linux_set_upcall_kse(struct thread *td, 
        return (0);
 }
 
-#define STACK_SIZE  (2 * 1024 * 1024)
-#define GUARD_SIZE  (4 * PAGE_SIZE)
-
 int
 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
 {
@@ -489,184 +484,11 @@ linux_mmap(struct thread *td, struct lin
            (uint32_t)linux_args.pgoff));
 }
 
-static int
-linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int 
prot,
-    l_int flags, l_int fd, l_loff_t pos)
-{
-       struct proc *p = td->td_proc;
-       struct mmap_args /* {
-               caddr_t addr;
-               size_t len;
-               int prot;
-               int flags;
-               int fd;
-               long pad;
-               off_t pos;
-       } */ bsd_args;
-       int error;
-       struct file *fp;
-       cap_rights_t rights;
-
-       error = 0;
-       bsd_args.flags = 0;
-       fp = NULL;
-
-       /*
-        * Linux mmap(2):
-        * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
-        */
-       if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
-               return (EINVAL);
-
-       if (flags & LINUX_MAP_SHARED)
-               bsd_args.flags |= MAP_SHARED;
-       if (flags & LINUX_MAP_PRIVATE)
-               bsd_args.flags |= MAP_PRIVATE;
-       if (flags & LINUX_MAP_FIXED)
-               bsd_args.flags |= MAP_FIXED;
-       if (flags & LINUX_MAP_ANON) {
-               /* Enforce pos to be on page boundary, then ignore. */
-               if ((pos & PAGE_MASK) != 0)
-                       return (EINVAL);
-               pos = 0;
-               bsd_args.flags |= MAP_ANON;
-       } else
-               bsd_args.flags |= MAP_NOSYNC;
-       if (flags & LINUX_MAP_GROWSDOWN)
-               bsd_args.flags |= MAP_STACK;
-
-       /*
-        * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
-        * on Linux/i386. We do this to ensure maximum compatibility.
-        * Linux/ia64 does the same in i386 emulation mode.
-        */
-       bsd_args.prot = prot;
-       if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-               bsd_args.prot |= PROT_READ | PROT_EXEC;
-
-       /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
-       bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
-       if (bsd_args.fd != -1) {
-               /*
-                * Linux follows Solaris mmap(2) description:
-                * The file descriptor fildes is opened with
-                * read permission, regardless of the
-                * protection options specified.
-                */
-
-               error = fget(td, bsd_args.fd,
-                   cap_rights_init(&rights, CAP_MMAP), &fp);
-               if (error != 0)
-                       return (error);
-               if (fp->f_type != DTYPE_VNODE) {
-                       fdrop(fp, td);
-                       return (EINVAL);
-               }
-
-               /* Linux mmap() just fails for O_WRONLY files */
-               if (!(fp->f_flag & FREAD)) {
-                       fdrop(fp, td);
-                       return (EACCES);
-               }
-
-               fdrop(fp, td);
-       }
-
-       if (flags & LINUX_MAP_GROWSDOWN) {
-               /*
-                * The Linux MAP_GROWSDOWN option does not limit auto
-                * growth of the region.  Linux mmap with this option
-                * takes as addr the initial BOS, and as len, the initial
-                * region size.  It can then grow down from addr without
-                * limit.  However, Linux threads has an implicit internal
-                * limit to stack size of STACK_SIZE.  Its just not
-                * enforced explicitly in Linux.  But, here we impose
-                * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
-                * region, since we can do this with our mmap.
-                *
-                * Our mmap with MAP_STACK takes addr as the maximum
-                * downsize limit on BOS, and as len the max size of
-                * the region.  It then maps the top SGROWSIZ bytes,
-                * and auto grows the region down, up to the limit
-                * in addr.
-                *
-                * If we don't use the MAP_STACK option, the effect
-                * of this code is to allocate a stack region of a
-                * fixed size of (STACK_SIZE - GUARD_SIZE).
-                */
-
-               if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
-                       /*
-                        * Some Linux apps will attempt to mmap
-                        * thread stacks near the top of their
-                        * address space.  If their TOS is greater
-                        * than vm_maxsaddr, vm_map_growstack()
-                        * will confuse the thread stack with the
-                        * process stack and deliver a SEGV if they
-                        * attempt to grow the thread stack past their
-                        * current stacksize rlimit.  To avoid this,
-                        * adjust vm_maxsaddr upwards to reflect
-                        * the current stacksize rlimit rather
-                        * than the maximum possible stacksize.
-                        * It would be better to adjust the
-                        * mmap'ed region, but some apps do not check
-                        * mmap's return value.
-                        */
-                       PROC_LOCK(p);
-                       p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK -
-                           lim_cur_proc(p, RLIMIT_STACK);
-                       PROC_UNLOCK(p);
-               }
-
-               /*
-                * This gives us our maximum stack size and a new BOS.
-                * If we're using VM_STACK, then mmap will just map
-                * the top SGROWSIZ bytes, and let the stack grow down
-                * to the limit at BOS.  If we're not using VM_STACK
-                * we map the full stack, since we don't have a way
-                * to autogrow it.
-                */
-               if (len > STACK_SIZE - GUARD_SIZE) {
-                       bsd_args.addr = (caddr_t)PTRIN(addr);
-                       bsd_args.len = len;
-               } else {
-                       bsd_args.addr = (caddr_t)PTRIN(addr) -
-                           (STACK_SIZE - GUARD_SIZE - len);
-                       bsd_args.len = STACK_SIZE - GUARD_SIZE;
-               }
-       } else {
-               bsd_args.addr = (caddr_t)PTRIN(addr);
-               bsd_args.len  = len;
-       }
-       bsd_args.pos = pos;
-
-#ifdef DEBUG
-       if (ldebug(mmap))
-               printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
-                   __func__,
-                   (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
-                   bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
-#endif
-       error = sys_mmap(td, &bsd_args);
-#ifdef DEBUG
-       if (ldebug(mmap))
-               printf("-> %s() return: 0x%x (0x%08x)\n",
-                       __func__, error, (u_int)td->td_retval[0]);
-#endif
-       return (error);
-}
-
 int
 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
 {
-       struct mprotect_args bsd_args;
 
-       bsd_args.addr = uap->addr;
-       bsd_args.len = uap->len;
-       bsd_args.prot = uap->prot;
-       if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-               bsd_args.prot |= PROT_READ | PROT_EXEC;
-       return (sys_mprotect(td, &bsd_args));
+       return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, 
uap->prot));
 }
 
 int

Added: head/sys/compat/linux/linux_mmap.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/compat/linux/linux_mmap.c  Sun Jul 10 08:22:04 2016        
(r302517)
@@ -0,0 +1,257 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2002 Doug Rabson
+ * Copyright (c) 2000 Marcel Moolenaar
+ * Copyright (c) 1994-1995 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/capsicum.h>
+#include <sys/file.h>
+#include <sys/imgact.h>
+#include <sys/ktr.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#include <compat/linux/linux_emul.h>
+#include <compat/linux/linux_mmap.h>
+#include <compat/linux/linux_persona.h>
+#include <compat/linux/linux_util.h>
+
+
+#define STACK_SIZE  (2 * 1024 * 1024)
+#define GUARD_SIZE  (4 * PAGE_SIZE)
+
+#if defined(__amd64__)
+static void linux_fixup_prot(struct thread *td, int *prot);
+#endif
+
+
+int
+linux_mmap_common(struct thread *td, uintptr_t addr, size_t len, int prot,
+    int flags, int fd, off_t pos)
+{
+       struct proc *p = td->td_proc;
+       struct vmspace *vms = td->td_proc->p_vmspace;
+       struct mmap_args /* {
+               caddr_t addr;
+               size_t len;
+               int prot;
+               int flags;
+               int fd;
+               off_t pos;
+       } */ bsd_args;
+       int error;
+       struct file *fp;
+
+       cap_rights_t rights;
+       LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx",
+           addr, len, prot, flags, fd, pos);
+
+       error = 0;
+       bsd_args.flags = 0;
+       fp = NULL;
+
+       /*
+        * Linux mmap(2):
+        * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
+        */
+       if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
+               return (EINVAL);
+
+       if (flags & LINUX_MAP_SHARED)
+               bsd_args.flags |= MAP_SHARED;
+       if (flags & LINUX_MAP_PRIVATE)
+               bsd_args.flags |= MAP_PRIVATE;
+       if (flags & LINUX_MAP_FIXED)
+               bsd_args.flags |= MAP_FIXED;
+       if (flags & LINUX_MAP_ANON) {
+               /* Enforce pos to be on page boundary, then ignore. */
+               if ((pos & PAGE_MASK) != 0)
+                       return (EINVAL);
+               pos = 0;
+               bsd_args.flags |= MAP_ANON;
+       } else
+               bsd_args.flags |= MAP_NOSYNC;
+       if (flags & LINUX_MAP_GROWSDOWN)
+               bsd_args.flags |= MAP_STACK;
+
+       /*
+        * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
+        * on Linux/i386 if the binary requires executable stack.
+        * We do this only for IA32 emulation as on native i386 this is does not
+        * make sense without PAE.
+        *
+        * XXX. Linux checks that the file system is not mounted with noexec.
+        */
+       bsd_args.prot = prot;
+#if defined(__amd64__)
+       linux_fixup_prot(td, &bsd_args.prot);
+#endif
+
+       /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
+       bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
+       if (bsd_args.fd != -1) {
+               /*
+                * Linux follows Solaris mmap(2) description:
+                * The file descriptor fildes is opened with
+                * read permission, regardless of the
+                * protection options specified.
+                */
+
+               error = fget(td, bsd_args.fd,
+                   cap_rights_init(&rights, CAP_MMAP), &fp);
+               if (error != 0)
+                       return (error);
+               if (fp->f_type != DTYPE_VNODE) {
+                       fdrop(fp, td);
+                       return (EINVAL);
+               }
+
+               /* Linux mmap() just fails for O_WRONLY files */
+               if (!(fp->f_flag & FREAD)) {
+                       fdrop(fp, td);
+                       return (EACCES);
+               }
+
+               fdrop(fp, td);
+       }
+
+       if (flags & LINUX_MAP_GROWSDOWN) {
+               /*
+                * The Linux MAP_GROWSDOWN option does not limit auto
+                * growth of the region.  Linux mmap with this option
+                * takes as addr the initial BOS, and as len, the initial
+                * region size.  It can then grow down from addr without
+                * limit.  However, Linux threads has an implicit internal
+                * limit to stack size of STACK_SIZE.  Its just not
+                * enforced explicitly in Linux.  But, here we impose
+                * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
+                * region, since we can do this with our mmap.
+                *
+                * Our mmap with MAP_STACK takes addr as the maximum
+                * downsize limit on BOS, and as len the max size of
+                * the region.  It then maps the top SGROWSIZ bytes,
+                * and auto grows the region down, up to the limit
+                * in addr.
+                *
+                * If we don't use the MAP_STACK option, the effect
+                * of this code is to allocate a stack region of a
+                * fixed size of (STACK_SIZE - GUARD_SIZE).
+                */
+
+               if ((caddr_t)addr + len > vms->vm_maxsaddr) {
+                       /*
+                        * Some Linux apps will attempt to mmap
+                        * thread stacks near the top of their
+                        * address space.  If their TOS is greater
+                        * than vm_maxsaddr, vm_map_growstack()
+                        * will confuse the thread stack with the
+                        * process stack and deliver a SEGV if they
+                        * attempt to grow the thread stack past their
+                        * current stacksize rlimit.  To avoid this,
+                        * adjust vm_maxsaddr upwards to reflect
+                        * the current stacksize rlimit rather
+                        * than the maximum possible stacksize.
+                        * It would be better to adjust the
+                        * mmap'ed region, but some apps do not check
+                        * mmap's return value.
+                        */
+                       PROC_LOCK(p);
+                       vms->vm_maxsaddr = (char *)p->p_sysent->sv_usrstack -
+                           lim_cur_proc(p, RLIMIT_STACK);
+                       PROC_UNLOCK(p);
+               }
+
+               /*
+                * This gives us our maximum stack size and a new BOS.
+                * If we're using VM_STACK, then mmap will just map
+                * the top SGROWSIZ bytes, and let the stack grow down
+                * to the limit at BOS.  If we're not using VM_STACK
+                * we map the full stack, since we don't have a way
+                * to autogrow it.
+                */
+               if (len > STACK_SIZE - GUARD_SIZE) {
+                       bsd_args.addr = (caddr_t)addr;
+                       bsd_args.len = len;
+               } else {
+                       bsd_args.addr = (caddr_t)addr -
+                           (STACK_SIZE - GUARD_SIZE - len);
+                       bsd_args.len = STACK_SIZE - GUARD_SIZE;
+               }
+       } else {
+               bsd_args.addr = (caddr_t)addr;
+               bsd_args.len  = len;
+       }
+       bsd_args.pos = pos;
+
+       error = sys_mmap(td, &bsd_args);
+
+       LINUX_CTR2(mmap2, "return: %d (%p)", error, td->td_retval[0]);
+
+       return (error);
+}
+
+int
+linux_mprotect_common(struct thread *td, uintptr_t addr, size_t len, int prot)
+{
+       struct mprotect_args bsd_args;
+
+       bsd_args.addr = (void *)addr;
+       bsd_args.len = len;
+       bsd_args.prot = prot;
+
+#if defined(__amd64__)
+       linux_fixup_prot(td, &bsd_args.prot);
+#endif
+       return (sys_mprotect(td, &bsd_args));
+}
+
+#if defined(__amd64__)
+static void
+linux_fixup_prot(struct thread *td, int *prot)
+{
+       struct linux_pemuldata *pem;
+
+       if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && *prot & PROT_READ) {
+               pem = pem_find(td->td_proc);
+               if (pem->persona & LINUX_READ_IMPLIES_EXEC)
+                       *prot |= PROT_EXEC;
+       }
+
+}
+#endif

Added: head/sys/compat/linux/linux_mmap.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/compat/linux/linux_mmap.h  Sun Jul 10 08:22:04 2016        
(r302517)
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2002 Doug Rabson
+ * Copyright (c) 2000 Marcel Moolenaar
+ * Copyright (c) 1994-1995 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _LINUX_MMAP_H_
+#define        _LINUX_MMAP_H_
+
+/* mmap options */
+#define        LINUX_MAP_SHARED        0x0001
+#define        LINUX_MAP_PRIVATE       0x0002
+#define        LINUX_MAP_FIXED         0x0010
+#define        LINUX_MAP_ANON          0x0020
+#define        LINUX_MAP_GROWSDOWN     0x0100
+
+
+int linux_mmap_common(struct thread *, uintptr_t, size_t, int, int,
+                       int, off_t);
+int linux_mprotect_common(struct thread *, uintptr_t, size_t, int);
+
+#endif /* _LINUX_MMAP_H_ */

Modified: head/sys/i386/linux/linux.h
==============================================================================
--- head/sys/i386/linux/linux.h Sun Jul 10 08:17:16 2016        (r302516)
+++ head/sys/i386/linux/linux.h Sun Jul 10 08:22:04 2016        (r302517)
@@ -140,13 +140,6 @@ struct l_rlimit {
        l_ulong rlim_max;
 };
 
-/* mmap options */
-#define        LINUX_MAP_SHARED        0x0001
-#define        LINUX_MAP_PRIVATE       0x0002
-#define        LINUX_MAP_FIXED         0x0010
-#define        LINUX_MAP_ANON          0x0020
-#define        LINUX_MAP_GROWSDOWN     0x0100
-
 struct l_mmap_argv {
        l_uintptr_t     addr;
        l_size_t        len;

Modified: head/sys/i386/linux/linux_machdep.c
==============================================================================
--- head/sys/i386/linux/linux_machdep.c Sun Jul 10 08:17:16 2016        
(r302516)
+++ head/sys/i386/linux/linux_machdep.c Sun Jul 10 08:22:04 2016        
(r302517)
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
 #include <i386/linux/linux_proto.h>
 #include <compat/linux/linux_ipc.h>
 #include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_emul.h>
@@ -95,10 +96,6 @@ struct l_old_select_argv {
        struct l_timeval        *timeout;
 };
 
-static int     linux_mmap_common(struct thread *td, l_uintptr_t addr,
-                   l_size_t len, l_int prot, l_int flags, l_int fd,
-                   l_loff_t pos);
-
 
 int
 linux_execve(struct thread *td, struct linux_execve_args *args)
@@ -340,9 +337,6 @@ linux_set_upcall_kse(struct thread *td, 
        return (0);
 }
 
-#define STACK_SIZE  (2 * 1024 * 1024)
-#define GUARD_SIZE  (4 * PAGE_SIZE)
-
 int
 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
 {
@@ -381,187 +375,11 @@ linux_mmap(struct thread *td, struct lin
            (uint32_t)linux_args.pgoff));
 }
 
-static int
-linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int 
prot,
-    l_int flags, l_int fd, l_loff_t pos)
-{
-       struct proc *p = td->td_proc;
-       struct mmap_args /* {
-               caddr_t addr;
-               size_t len;
-               int prot;
-               int flags;
-               int fd;
-               long pad;
-               off_t pos;
-       } */ bsd_args;
-       int error;
-       struct file *fp;
-       cap_rights_t rights;
-
-       error = 0;
-       bsd_args.flags = 0;
-       fp = NULL;
-
-       /*
-        * Linux mmap(2):
-        * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
-        */
-       if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
-               return (EINVAL);
-
-       if (flags & LINUX_MAP_SHARED)
-               bsd_args.flags |= MAP_SHARED;
-       if (flags & LINUX_MAP_PRIVATE)
-               bsd_args.flags |= MAP_PRIVATE;
-       if (flags & LINUX_MAP_FIXED)
-               bsd_args.flags |= MAP_FIXED;
-       if (flags & LINUX_MAP_ANON) {
-               /* Enforce pos to be on page boundary, then ignore. */
-               if ((pos & PAGE_MASK) != 0)
-                       return (EINVAL);
-               pos = 0;
-               bsd_args.flags |= MAP_ANON;
-       } else
-               bsd_args.flags |= MAP_NOSYNC;
-       if (flags & LINUX_MAP_GROWSDOWN)
-               bsd_args.flags |= MAP_STACK;
-
-       /*
-        * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
-        * on Linux/i386. We do this to ensure maximum compatibility.
-        * Linux/ia64 does the same in i386 emulation mode.
-        */
-       bsd_args.prot = prot;
-       if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-               bsd_args.prot |= PROT_READ | PROT_EXEC;
-
-       /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
-       bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
-       if (bsd_args.fd != -1) {
-               /*
-                * Linux follows Solaris mmap(2) description:
-                * The file descriptor fildes is opened with
-                * read permission, regardless of the
-                * protection options specified.
-                *
-                * Checking just CAP_MMAP is fine here, since the real work
-                * is done in the FreeBSD mmap().
-                */
-
-               error = fget(td, bsd_args.fd,
-                   cap_rights_init(&rights, CAP_MMAP), &fp);
-               if (error != 0)
-                       return (error);
-               if (fp->f_type != DTYPE_VNODE) {
-                       fdrop(fp, td);
-                       return (EINVAL);
-               }
-
-               /* Linux mmap() just fails for O_WRONLY files */
-               if (!(fp->f_flag & FREAD)) {
-                       fdrop(fp, td);
-                       return (EACCES);
-               }
-
-               fdrop(fp, td);
-       }
-
-       if (flags & LINUX_MAP_GROWSDOWN) {
-               /* 
-                * The Linux MAP_GROWSDOWN option does not limit auto
-                * growth of the region.  Linux mmap with this option
-                * takes as addr the inital BOS, and as len, the initial
-                * region size.  It can then grow down from addr without
-                * limit.  However, linux threads has an implicit internal
-                * limit to stack size of STACK_SIZE.  Its just not
-                * enforced explicitly in linux.  But, here we impose
-                * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
-                * region, since we can do this with our mmap.
-                *
-                * Our mmap with MAP_STACK takes addr as the maximum
-                * downsize limit on BOS, and as len the max size of
-                * the region.  It them maps the top SGROWSIZ bytes,
-                * and auto grows the region down, up to the limit
-                * in addr.
-                *
-                * If we don't use the MAP_STACK option, the effect
-                * of this code is to allocate a stack region of a
-                * fixed size of (STACK_SIZE - GUARD_SIZE).
-                */
-
-               if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
-                       /* 
-                        * Some linux apps will attempt to mmap
-                        * thread stacks near the top of their
-                        * address space.  If their TOS is greater
-                        * than vm_maxsaddr, vm_map_growstack()
-                        * will confuse the thread stack with the
-                        * process stack and deliver a SEGV if they
-                        * attempt to grow the thread stack past their
-                        * current stacksize rlimit.  To avoid this,
-                        * adjust vm_maxsaddr upwards to reflect
-                        * the current stacksize rlimit rather
-                        * than the maximum possible stacksize.
-                        * It would be better to adjust the
-                        * mmap'ed region, but some apps do not check
-                        * mmap's return value.
-                        */
-                       PROC_LOCK(p);
-                       p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
-                           lim_cur_proc(p, RLIMIT_STACK);
-                       PROC_UNLOCK(p);
-               }
-
-               /*
-                * This gives us our maximum stack size and a new BOS.
-                * If we're using VM_STACK, then mmap will just map
-                * the top SGROWSIZ bytes, and let the stack grow down
-                * to the limit at BOS.  If we're not using VM_STACK
-                * we map the full stack, since we don't have a way
-                * to autogrow it.
-                */
-               if (len > STACK_SIZE - GUARD_SIZE) {
-                       bsd_args.addr = (caddr_t)PTRIN(addr);
-                       bsd_args.len = len;
-               } else {
-                       bsd_args.addr = (caddr_t)PTRIN(addr) -
-                           (STACK_SIZE - GUARD_SIZE - len);
-                       bsd_args.len = STACK_SIZE - GUARD_SIZE;
-               }
-       } else {
-               bsd_args.addr = (caddr_t)PTRIN(addr);
-               bsd_args.len  = len;
-       }
-       bsd_args.pos = pos;
-
-#ifdef DEBUG
-       if (ldebug(mmap))
-               printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
-                   __func__,
-                   (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
-                   bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
-#endif
-       error = sys_mmap(td, &bsd_args);
-#ifdef DEBUG
-       if (ldebug(mmap))
-               printf("-> %s() return: 0x%x (0x%08x)\n",
-                       __func__, error, (u_int)td->td_retval[0]);
-#endif
-       return (error);
-}
-
 int
 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
 {
-       struct mprotect_args bsd_args;
 
-       bsd_args.addr = uap->addr;
-       bsd_args.len = uap->len;
-       bsd_args.prot = uap->prot;
-       if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-               bsd_args.prot |= PROT_READ | PROT_EXEC;
-       return (sys_mprotect(td, &bsd_args));
+       return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, 
uap->prot));
 }
 
 int

Modified: head/sys/modules/linux/Makefile
==============================================================================
--- head/sys/modules/linux/Makefile     Sun Jul 10 08:17:16 2016        
(r302516)
+++ head/sys/modules/linux/Makefile     Sun Jul 10 08:22:04 2016        
(r302517)
@@ -30,7 +30,7 @@ SRCS+=        opt_apic.h
 OBJS=  ${VDSO}.so
 
 .if ${MACHINE_CPUARCH} == "i386"
-SRCS+= linux_ptrace.c imgact_linux.c linux_util.c linux_mib.c \
+SRCS+= linux_ptrace.c imgact_linux.c linux_util.c linux_mib.c linux_mmap.c \
        linux_emul.c opt_cpu.h linux.c

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to