Module Name: src Committed By: chs Date: Mon Feb 29 00:34:17 UTC 2016
Modified Files: src/share/man/man9: kmem.9 vmem.9 src/sys/kern: subr_kmem.c subr_vmem.c Log Message: fix vmem_alloc() to never return an error for VM_SLEEP requests, thus fixing kmem_alloc() to never return NULL for KM_SLEEP requests. instead these operations will retry forever, which was the intent. To generate a diff of this commit: cvs rdiff -u -r1.19 -r1.20 src/share/man/man9/kmem.9 cvs rdiff -u -r1.15 -r1.16 src/share/man/man9/vmem.9 cvs rdiff -u -r1.61 -r1.62 src/sys/kern/subr_kmem.c cvs rdiff -u -r1.93 -r1.94 src/sys/kern/subr_vmem.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/share/man/man9/kmem.9 diff -u src/share/man/man9/kmem.9:1.19 src/share/man/man9/kmem.9:1.20 --- src/share/man/man9/kmem.9:1.19 Fri Dec 11 10:05:17 2015 +++ src/share/man/man9/kmem.9 Mon Feb 29 00:34:17 2016 @@ -1,4 +1,4 @@ -.\" $NetBSD: kmem.9,v 1.19 2015/12/11 10:05:17 wiz Exp $ +.\" $NetBSD: kmem.9,v 1.20 2016/02/29 00:34:17 chs Exp $ .\" .\" Copyright (c)2006 YAMAMOTO Takashi, .\" All rights reserved. @@ -25,7 +25,7 @@ .\" SUCH DAMAGE. .\" .\" ------------------------------------------------------------ -.Dd December 10, 2015 +.Dd February 28, 2016 .Dt KMEM 9 .Os .\" ------------------------------------------------------------ @@ -77,15 +77,9 @@ Either of the following: .It Dv KM_SLEEP If the allocation cannot be satisfied immediately, sleep until enough memory is available. -Note that this does not mean that if +If .Dv KM_SLEEP is specified, then the allocation cannot fail. -Under resource stress conditions, the allocation can fail and the -function will return -.Dv NULL . -One such scenario is when the allocation size is larger than it can ever -be allocated; another is when the system memory resources are exhausted -to even allocate pools of pages. .It Dv KM_NOSLEEP Don't sleep. Immediately return @@ -143,9 +137,6 @@ It must be the one returned by .Fn kmem_alloc or .Fn kmem_zalloc . -One such scenario is when the allocation size is larger than it can ever -be allocated; another is when the system memory resources are exhausted -to even allocate pools of pages. .It Fa size The size of the memory being freed, in bytes. It must be the same as the @@ -185,10 +176,6 @@ It should be noted that .Fn kmem_free may also block. .Pp -Always check the return value of the allocators, even when -.Dv KM_SLEEP -is specified to avoid kernel crashes during resource stress conditions. -.Pp For some locks this is permissible or even unavoidable. For others, particularly locks that may be taken from soft interrupt context, it is a serious problem. Index: src/share/man/man9/vmem.9 diff -u src/share/man/man9/vmem.9:1.15 src/share/man/man9/vmem.9:1.16 --- src/share/man/man9/vmem.9:1.15 Tue Jan 29 22:02:17 2013 +++ src/share/man/man9/vmem.9 Mon Feb 29 00:34:17 2016 @@ -1,4 +1,4 @@ -.\" $NetBSD: vmem.9,v 1.15 2013/01/29 22:02:17 wiz Exp $ +.\" $NetBSD: vmem.9,v 1.16 2016/02/29 00:34:17 chs Exp $ .\" .\" Copyright (c)2006 YAMAMOTO Takashi, .\" All rights reserved. @@ -25,7 +25,7 @@ .\" SUCH DAMAGE. .\" .\" ------------------------------------------------------------ -.Dd January 29, 2013 +.Dd February 28, 2016 .Dt VMEM 9 .Os .\" ------------------------------------------------------------ @@ -83,7 +83,7 @@ other than virtual memory. .Fn vmem_create creates a new vmem arena. .Pp -.Bl -tag -width qcache_max +.Bl -tag -offset indent -width qcache_max .It Fa name The string to describe the vmem. .It Fa base @@ -118,7 +118,7 @@ calls to import a span of size at least .Fa size . .Fa allocfn -should accept the same +must accept the same .Fa flags as .Fn vmem_alloc . @@ -169,7 +169,8 @@ It is merely a hint and can be ignored. Either of: .Bl -tag -width VM_NOSLEEP .It Dv VM_SLEEP -Can sleep until enough resources are available. +If the allocation cannot be satisfied immediately, sleep until enough +resources are available. .It Dv VM_NOSLEEP Don't sleep. Immediately return @@ -184,7 +185,7 @@ Interrupt level to be blocked for alloca .Fn vmem_xcreate creates a new vmem arena. .Pp -.Bl -tag -width qcache_max +.Bl -tag -offset indent -width qcache_max .It Fa name The string to describe the vmem. .It Fa base @@ -220,7 +221,7 @@ calls to import a span of size at least .Fa size . .Fa allocfn -should accept the same +must accept the same .Fa flags as .Fn vmem_alloc . @@ -274,7 +275,8 @@ It is merely a hint and can be ignored. Either of: .Bl -tag -width VM_NOSLEEP .It Dv VM_SLEEP -Can sleep until enough resources are available. +If the allocation cannot be satisfied immediately, sleep until enough +resources are available. .It Dv VM_NOSLEEP Don't sleep. Immediately return @@ -297,23 +299,26 @@ Returns on success, .Dv ENOMEM on failure. -.Fa flags -should be one of: +.Bl -tag -offset indent -width flags +.It Fa flags +Either of: .Bl -tag -width VM_NOSLEEP .It Dv VM_SLEEP -Can sleep until enough resources are available. +If the allocation cannot be satisfied immediately, sleep until enough +resources are available. .It Dv VM_NOSLEEP Don't sleep. Immediately return .Dv ENOMEM if there are not enough resources available. .El +.El .Pp .\" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .Fn vmem_xalloc allocates a resource from the arena. .Pp -.Bl -tag -width nocross +.Bl -tag -offset indent -width nocross .It Fa vm The arena which we allocate from. .It Fa size @@ -333,10 +338,10 @@ If .Fa align is zero, .Fa phase -should be zero. +must be zero. Otherwise, .Fa phase -should be smaller than +must be smaller than .Fa align . .It Fa nocross Request a resource which doesn't cross @@ -353,7 +358,7 @@ if the caller does not care. .It Fa flags A bitwise OR of an allocation strategy and a sleep flag. .Pp -The allocation strategy is one of: +The allocation strategy must be one of: .Bl -tag -width VM_INSTANTFIT .It Dv VM_BESTFIT Prefer space efficiency. @@ -361,10 +366,11 @@ Prefer space efficiency. Prefer performance. .El .Pp -The sleep flag should be one of: +The sleep flag must be one of: .Bl -tag -width VM_NOSLEEP .It Dv VM_SLEEP -Can sleep until enough resources are available. +If the allocation cannot be satisfied immediately, sleep until enough +resources are available. .It Dv VM_NOSLEEP Don't sleep. Immediately return @@ -386,14 +392,14 @@ frees resource allocated by .Fn vmem_xalloc to the arena. .Pp -.Bl -tag -width addr +.Bl -tag -offset indent -width addr .It Fa vm The arena which we free to. .It Fa addr The resource being freed. -It must be the one returned by +It must have been allocated via .Fn vmem_xalloc . -Notably, it must not be the one from +Notably, it must not have been allocated via .Fn vmem_alloc . Otherwise, the behaviour is undefined. .It Fa size @@ -408,7 +414,7 @@ argument used for .Fn vmem_alloc allocates a resource from the arena. .Pp -.Bl -tag -width flags +.Bl -tag -offset indent -width flags .It Fa vm The arena which we allocate from. .It Fa size @@ -416,7 +422,7 @@ Specify the size of the allocation. .It Fa flags A bitwise OR of an allocation strategy and a sleep flag. .Pp -The allocation strategy is one of: +The allocation strategy must be one of: .Bl -tag -width VM_INSTANTFIT .It Dv VM_BESTFIT Prefer space efficiency. @@ -424,10 +430,11 @@ Prefer space efficiency. Prefer performance. .El .Pp -The sleep flag should be one of: +The sleep flag must be one of: .Bl -tag -width VM_NOSLEEP .It Dv VM_SLEEP -Can sleep until enough resources are available. +If the allocation cannot be satisfied immediately, sleep until enough +resources are available. .It Dv VM_NOSLEEP Don't sleep. Immediately return @@ -449,14 +456,14 @@ frees resource allocated by .Fn vmem_alloc to the arena. .Pp -.Bl -tag -width addr +.Bl -tag -offset indent -width addr .It Fa vm The arena which we free to. .It Fa addr The resource being freed. -It must be the one returned by +It must have been allocated via .Fn vmem_alloc . -Notably, it must not be the one from +Notably, it must not have been allocated via .Fn vmem_xalloc . Otherwise, the behaviour is undefined. .It Fa size @@ -471,10 +478,10 @@ argument used for .Fn vmem_destroy destroys a vmem arena. .Pp -.Bl -tag -width vm +.Bl -tag -offset indent -width vm .It Fa vm The vmem arena being destroyed. -The caller should ensure that no one will use it anymore. +The caller must ensure that no one will use it anymore. .El .\" ------------------------------------------------------------ .Sh RETURN VALUES Index: src/sys/kern/subr_kmem.c diff -u src/sys/kern/subr_kmem.c:1.61 src/sys/kern/subr_kmem.c:1.62 --- src/sys/kern/subr_kmem.c:1.61 Mon Jul 27 09:24:28 2015 +++ src/sys/kern/subr_kmem.c Mon Feb 29 00:34:17 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: subr_kmem.c,v 1.61 2015/07/27 09:24:28 maxv Exp $ */ +/* $NetBSD: subr_kmem.c,v 1.62 2016/02/29 00:34:17 chs Exp $ */ /*- * Copyright (c) 2009-2015 The NetBSD Foundation, Inc. @@ -100,7 +100,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: subr_kmem.c,v 1.61 2015/07/27 09:24:28 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_kmem.c,v 1.62 2016/02/29 00:34:17 chs Exp $"); #include <sys/param.h> #include <sys/callback.h> @@ -383,9 +383,13 @@ kmem_intr_free(void *p, size_t requested void * kmem_alloc(size_t size, km_flag_t kmflags) { + void *v; + KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()), "kmem(9) should not be used from the interrupt context"); - return kmem_intr_alloc(size, kmflags); + v = kmem_intr_alloc(size, kmflags); + KASSERT(v || (kmflags & KM_NOSLEEP) != 0); + return v; } /* @@ -396,9 +400,13 @@ kmem_alloc(size_t size, km_flag_t kmflag void * kmem_zalloc(size_t size, km_flag_t kmflags) { + void *v; + KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()), "kmem(9) should not be used from the interrupt context"); - return kmem_intr_zalloc(size, kmflags); + v = kmem_intr_zalloc(size, kmflags); + KASSERT(v || (kmflags & KM_NOSLEEP) != 0); + return v; } /* Index: src/sys/kern/subr_vmem.c diff -u src/sys/kern/subr_vmem.c:1.93 src/sys/kern/subr_vmem.c:1.94 --- src/sys/kern/subr_vmem.c:1.93 Mon Aug 24 22:50:32 2015 +++ src/sys/kern/subr_vmem.c Mon Feb 29 00:34:17 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: subr_vmem.c,v 1.93 2015/08/24 22:50:32 pooka Exp $ */ +/* $NetBSD: subr_vmem.c,v 1.94 2016/02/29 00:34:17 chs Exp $ */ /*- * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, @@ -46,7 +46,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.93 2015/08/24 22:50:32 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.94 2016/02/29 00:34:17 chs Exp $"); #if defined(_KERNEL) && defined(_KERNEL_OPT) #include "opt_ddb.h" @@ -194,9 +194,19 @@ static LIST_HEAD(, vmem_btag) vmem_btag_ static size_t vmem_btag_freelist_count = 0; static struct pool vmem_btag_pool; +static void +vmem_kick_pdaemon(void) +{ +#if defined(_KERNEL) + mutex_spin_enter(&uvm_fpageqlock); + uvm_kick_pdaemon(); + mutex_spin_exit(&uvm_fpageqlock); +#endif +} + /* ---- boundary tag */ -static int bt_refill(vmem_t *vm, vm_flag_t flags); +static int bt_refill(vmem_t *vm); static void * pool_page_alloc_vmem_meta(struct pool *pp, int flags) @@ -226,12 +236,10 @@ struct pool_allocator pool_allocator_vme }; static int -bt_refill(vmem_t *vm, vm_flag_t flags) +bt_refill(vmem_t *vm) { bt_t *bt; - KASSERT(flags & VM_NOSLEEP); - VMEM_LOCK(vm); if (vm->vm_nfreetags > BT_MINRESERVE) { VMEM_UNLOCK(vm); @@ -270,12 +278,9 @@ bt_refill(vmem_t *vm, vm_flag_t flags) VMEM_UNLOCK(vm); if (kmem_meta_arena != NULL) { - bt_refill(kmem_arena, (flags & ~VM_FITMASK) - | VM_INSTANTFIT | VM_POPULATING); - bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK) - | VM_INSTANTFIT | VM_POPULATING); - bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK) - | VM_INSTANTFIT | VM_POPULATING); + (void)bt_refill(kmem_arena); + (void)bt_refill(kmem_va_meta_arena); + (void)bt_refill(kmem_meta_arena); } return 0; @@ -288,8 +293,22 @@ bt_alloc(vmem_t *vm, vm_flag_t flags) VMEM_LOCK(vm); while (vm->vm_nfreetags <= BT_MINRESERVE && (flags & VM_POPULATING) == 0) { VMEM_UNLOCK(vm); - if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) { - return NULL; + if (bt_refill(vm)) { + if ((flags & VM_NOSLEEP) != 0) { + return NULL; + } + + /* + * It would be nice to wait for something specific here + * but there are multiple ways that a retry could + * succeed and we can't wait for multiple things + * simultaneously. So we'll just sleep for an arbitrary + * short period of time and retry regardless. + * This should be a very rare case. + */ + + vmem_kick_pdaemon(); + kpause("btalloc", false, 1, NULL); } VMEM_LOCK(vm); } @@ -940,7 +959,7 @@ vmem_init(vmem_t *vm, const char *name, #if defined(_KERNEL) if (flags & VM_BOOTSTRAP) { - bt_refill(vm, VM_NOSLEEP); + bt_refill(vm); } mutex_enter(&vmem_list_lock); @@ -1177,11 +1196,7 @@ retry: /* XXX */ if ((flags & VM_SLEEP) != 0) { -#if defined(_KERNEL) - mutex_spin_enter(&uvm_fpageqlock); - uvm_kick_pdaemon(); - mutex_spin_exit(&uvm_fpageqlock); -#endif + vmem_kick_pdaemon(); VMEM_LOCK(vm); VMEM_CONDVAR_WAIT(vm); VMEM_UNLOCK(vm);