Author: markj
Date: Sat Jul  8 01:56:48 2017
New Revision: 320797
URL: https://svnweb.freebsd.org/changeset/base/320797

Log:
  MFC r311346, r311352, r313756:
  Add an allocator for KVA for execve arguments.

Modified:
  stable/11/sys/kern/kern_exec.c
  stable/11/sys/sys/imgact.h
  stable/11/sys/vm/vm_init.c
  stable/11/sys/vm/vm_kern.c
  stable/11/sys/vm/vm_kern.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/kern/kern_exec.c
==============================================================================
--- stable/11/sys/kern/kern_exec.c      Fri Jul  7 22:00:39 2017        
(r320796)
+++ stable/11/sys/kern/kern_exec.c      Sat Jul  8 01:56:48 2017        
(r320797)
@@ -50,6 +50,11 @@ __FBSDID("$FreeBSD$");
 #include <sys/imgact_elf.h>
 #include <sys/wait.h>
 #include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/pioctl.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
@@ -63,6 +68,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/shm.h>
+#include <sys/signalvar.h>
+#include <sys/smp.h>
+#include <sys/stat.h>
+#include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
@@ -1315,17 +1324,124 @@ err_exit:
        return (error);
 }
 
+struct exec_args_kva {
+       vm_offset_t addr;
+       u_int gen;
+       SLIST_ENTRY(exec_args_kva) next;
+};
+
+static DPCPU_DEFINE(struct exec_args_kva *, exec_args_kva);
+
+static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist;
+static struct mtx exec_args_kva_mtx;
+static u_int exec_args_gen;
+
+static void
+exec_prealloc_args_kva(void *arg __unused)
+{
+       struct exec_args_kva *argkva;
+       u_int i;
+
+       SLIST_INIT(&exec_args_kva_freelist);
+       mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF);
+       for (i = 0; i < exec_map_entries; i++) {
+               argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK);
+               argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size);
+               argkva->gen = exec_args_gen;
+               SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
+       }
+}
+SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, 
NULL);
+
+static vm_offset_t
+exec_alloc_args_kva(void **cookie)
+{
+       struct exec_args_kva *argkva;
+
+       argkva = (void *)atomic_readandclear_ptr(
+           (uintptr_t *)DPCPU_PTR(exec_args_kva));
+       if (argkva == NULL) {
+               mtx_lock(&exec_args_kva_mtx);
+               while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL)
+                       (void)mtx_sleep(&exec_args_kva_freelist,
+                           &exec_args_kva_mtx, 0, "execkva", 0);
+               SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next);
+               mtx_unlock(&exec_args_kva_mtx);
+       }
+       *(struct exec_args_kva **)cookie = argkva;
+       return (argkva->addr);
+}
+
+static void
+exec_release_args_kva(struct exec_args_kva *argkva, u_int gen)
+{
+       vm_offset_t base;
+
+       base = argkva->addr;
+       if (argkva->gen != gen) {
+               vm_map_madvise(exec_map, base, base + exec_map_entry_size,
+                   MADV_FREE);
+               argkva->gen = gen;
+       }
+       if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva),
+           (uintptr_t)NULL, (uintptr_t)argkva)) {
+               mtx_lock(&exec_args_kva_mtx);
+               SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
+               wakeup_one(&exec_args_kva_freelist);
+               mtx_unlock(&exec_args_kva_mtx);
+       }
+}
+
+static void
+exec_free_args_kva(void *cookie)
+{
+
+       exec_release_args_kva(cookie, exec_args_gen);
+}
+
+static void
+exec_args_kva_lowmem(void *arg __unused)
+{
+       SLIST_HEAD(, exec_args_kva) head;
+       struct exec_args_kva *argkva;
+       u_int gen;
+       int i;
+
+       gen = atomic_fetchadd_int(&exec_args_gen, 1) + 1;
+
+       /*
+        * Force an madvise of each KVA range. Any currently allocated ranges
+        * will have MADV_FREE applied once they are freed.
+        */
+       SLIST_INIT(&head);
+       mtx_lock(&exec_args_kva_mtx);
+       SLIST_SWAP(&head, &exec_args_kva_freelist, exec_args_kva);
+       mtx_unlock(&exec_args_kva_mtx);
+       while ((argkva = SLIST_FIRST(&head)) != NULL) {
+               SLIST_REMOVE_HEAD(&head, next);
+               exec_release_args_kva(argkva, gen);
+       }
+
+       CPU_FOREACH(i) {
+               argkva = (void *)atomic_readandclear_ptr(
+                   (uintptr_t *)DPCPU_ID_PTR(i, exec_args_kva));
+               if (argkva != NULL)
+                       exec_release_args_kva(argkva, gen);
+       }
+}
+EVENTHANDLER_DEFINE(vm_lowmem, exec_args_kva_lowmem, NULL,
+    EVENTHANDLER_PRI_ANY);
+
 /*
  * Allocate temporary demand-paged, zero-filled memory for the file name,
- * argument, and environment strings.  Returns zero if the allocation succeeds
- * and ENOMEM otherwise.
+ * argument, and environment strings.
  */
 int
 exec_alloc_args(struct image_args *args)
 {
 
-       args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX);
-       return (args->buf != NULL ? 0 : ENOMEM);
+       args->buf = (char *)exec_alloc_args_kva(&args->bufkva);
+       return (0);
 }
 
 void
@@ -1333,8 +1449,7 @@ exec_free_args(struct image_args *args)
 {
 
        if (args->buf != NULL) {
-               kmap_free_wakeup(exec_map, (vm_offset_t)args->buf,
-                   PATH_MAX + ARG_MAX);
+               exec_free_args_kva(args->bufkva);
                args->buf = NULL;
        }
        if (args->fname_buf != NULL) {

Modified: stable/11/sys/sys/imgact.h
==============================================================================
--- stable/11/sys/sys/imgact.h  Fri Jul  7 22:00:39 2017        (r320796)
+++ stable/11/sys/sys/imgact.h  Sat Jul  8 01:56:48 2017        (r320797)
@@ -42,6 +42,7 @@ struct ucred;
 
 struct image_args {
        char *buf;              /* pointer to string buffer */
+       void *bufkva;           /* cookie for string buffer KVA */
        char *begin_argv;       /* beginning of argv in buf */
        char *begin_envv;       /* beginning of envv in buf */
        char *endp;             /* current `end' pointer of arg & env strings */

Modified: stable/11/sys/vm/vm_init.c
==============================================================================
--- stable/11/sys/vm/vm_init.c  Fri Jul  7 22:00:39 2017        (r320796)
+++ stable/11/sys/vm/vm_init.c  Sat Jul  8 01:56:48 2017        (r320797)
@@ -91,10 +91,6 @@ __FBSDID("$FreeBSD$");
 
 long physmem;
 
-static int exec_map_entries = 16;
-SYSCTL_INT(_vm, OID_AUTO, exec_map_entries, CTLFLAG_RDTUN, &exec_map_entries, 
0,
-    "Maximum number of simultaneous execs");
-
 /*
  * System initialization
  */
@@ -261,10 +257,19 @@ again:
                panic("Clean map calculation incorrect");
 
        /*
-        * Allocate the pageable submaps.
+        * Allocate the pageable submaps.  We may cache an exec map entry per
+        * CPU, so we therefore need to reserve space for at least ncpu+1
+        * entries to avoid deadlock.  The exec map is also used by some image
+        * activators, so we leave a fixed number of pages for their use.
         */
+#ifdef __LP64__
+       exec_map_entries = 8 * mp_ncpus;
+#else
+       exec_map_entries = 2 * mp_ncpus + 4;
+#endif
+       exec_map_entry_size = round_page(PATH_MAX + ARG_MAX);
        exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
-           exec_map_entries * round_page(PATH_MAX + ARG_MAX), FALSE);
+           exec_map_entries * exec_map_entry_size + 64 * PAGE_SIZE, FALSE);
        pipe_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, maxpipekva,
            FALSE);
 }

Modified: stable/11/sys/vm/vm_kern.c
==============================================================================
--- stable/11/sys/vm/vm_kern.c  Fri Jul  7 22:00:39 2017        (r320796)
+++ stable/11/sys/vm/vm_kern.c  Sat Jul  8 01:56:48 2017        (r320797)
@@ -97,6 +97,9 @@ CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0);
 /* NB: Used by kernel debuggers. */
 const u_long vm_maxuser_address = VM_MAXUSER_ADDRESS;
 
+u_int exec_map_entry_size;
+u_int exec_map_entries;
+
 SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD,
     SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address");
 

Modified: stable/11/sys/vm/vm_kern.h
==============================================================================
--- stable/11/sys/vm/vm_kern.h  Fri Jul  7 22:00:39 2017        (r320796)
+++ stable/11/sys/vm/vm_kern.h  Sat Jul  8 01:56:48 2017        (r320797)
@@ -61,7 +61,7 @@
  */
 
 #ifndef _VM_VM_KERN_H_
-#define _VM_VM_KERN_H_ 1
+#define        _VM_VM_KERN_H_
 
 /* Kernel memory management definitions. */
 extern vm_map_t kernel_map;
@@ -74,5 +74,7 @@ extern struct vmem *transient_arena;
 extern struct vmem *memguard_arena;
 extern vm_offset_t swapbkva;
 extern u_long vm_kmem_size;
+extern u_int exec_map_entries;
+extern u_int exec_map_entry_size;
 
-#endif                         /* _VM_VM_KERN_H_ */
+#endif /* _VM_VM_KERN_H_ */
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to