> Date: Sun, 03 Apr 2011 18:38:51 -0600
> From: Theo de Raadt <[email protected]>
>
> based on a conversation at the bar.
>
> POOL_DEBUG is expensive. But we really want it because it finds bugs
> before they hurt us. The solution to this is to make it simpler to
> turn off.
>
> This diff starts the kernel with pool debug on, but allows it to be
> turned off with sysctl kern.pool_debug=0. This does not gaurantee
> that all the pool pages will be unchecked, but it does help.
>
> This will let people who care about performance turn it off permanently
You mean people who care more about performance than the reliability
of their (future) kernels ;).
> in sysctl.conf; I think we will add a line there for people to know how
> to use it.
Anyway, the diff makes sense. At least we can tell the knob pushing
folks to turn this back on if they're experiencing random memory
corruption.
> Index: kern/subr_pool.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/subr_pool.c,v
> retrieving revision 1.100
> diff -u -r1.100 subr_pool.c
> --- kern/subr_pool.c 3 Apr 2011 22:07:37 -0000 1.100
> +++ kern/subr_pool.c 3 Apr 2011 22:59:39 -0000
> @@ -42,7 +42,7 @@
> #include <sys/sysctl.h>
>
> #include <uvm/uvm.h>
> -
> +#include <dev/rndvar.h>
>
> /*
> * Pool resource management utility.
> @@ -74,6 +74,7 @@
> caddr_t ph_page; /* this page's address */
> caddr_t ph_colored; /* page's colored address */
> int ph_pagesize;
> + int ph_magic;
> };
>
> struct pool_item {
> @@ -89,6 +90,7 @@
> #else
> #define PI_MAGIC 0xdeafbeef
> #endif
> +int pool_debug = 1;
>
> #define POOL_NEEDS_CATCHUP(pp)
> \
> ((pp)->pr_nitems < (pp)->pr_minitems)
> @@ -441,7 +443,8 @@
> else
> ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) |
> PR_NOWAIT);
> -
> + if (pool_debug)
> + ph->ph_magic = PI_MAGIC;
> return (ph);
> }
>
> @@ -611,13 +614,15 @@
> "page %p; item addr %p; offset 0x%x=0x%x",
> pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic);
> #ifdef POOL_DEBUG
> - for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
> - i < pp->pr_size / sizeof(int); i++) {
> - if (ip[i] != PI_MAGIC) {
> - panic("pool_do_get(%s): free list modified: "
> - "page %p; item addr %p; offset 0x%x=0x%x",
> - pp->pr_wchan, ph->ph_page, pi,
> - i * sizeof(int), ip[i]);
> + if (pool_debug && ph->ph_magic) {
> + for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
> + i < pp->pr_size / sizeof(int); i++) {
> + if (ip[i] != ph->ph_magic) {
> + panic("pool_do_get(%s): free list modified: "
> + "page %p; item addr %p; offset 0x%x=0x%x",
> + pp->pr_wchan, ph->ph_page, pi,
> + i * sizeof(int), ip[i]);
> + }
> }
> }
> #endif /* POOL_DEBUG */
> @@ -731,9 +736,11 @@
> #ifdef DIAGNOSTIC
> pi->pi_magic = PI_MAGIC;
> #ifdef POOL_DEBUG
> - for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
> - i < pp->pr_size / sizeof(int); i++)
> - ip[i] = PI_MAGIC;
> + if (ph->ph_magic) {
> + for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
> + i < pp->pr_size / sizeof(int); i++)
> + ip[i] = ph->ph_magic;
> + }
> #endif /* POOL_DEBUG */
> #endif /* DIAGNOSTIC */
>
> @@ -886,9 +893,11 @@
> #ifdef DIAGNOSTIC
> pi->pi_magic = PI_MAGIC;
> #ifdef POOL_DEBUG
> - for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
> - i < pp->pr_size / sizeof(int); i++)
> - ip[i] = PI_MAGIC;
> + if (ph->ph_magic) {
> + for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
> + i < pp->pr_size / sizeof(int); i++)
> + ip[i] = ph->ph_magic;
> + }
> #endif /* POOL_DEBUG */
> #endif /* DIAGNOSTIC */
> cp = (caddr_t)(cp + pp->pr_size);
> @@ -1273,14 +1282,16 @@
> 0, pi->pi_magic);
> }
> #ifdef POOL_DEBUG
> - for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
> - i < pp->pr_size / sizeof(int); i++) {
> - if (ip[i] != PI_MAGIC) {
> - printf("pool(%s): free list modified: "
> - "page %p; item ordinal %d; addr %p "
> - "(p %p); offset 0x%x=0x%x\n",
> - pp->pr_wchan, ph->ph_page, n, pi,
> - page, i * sizeof(int), ip[i]);
> + if (pool_debug && ph->ph_magic) {
> + for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
> + i < pp->pr_size / sizeof(int); i++) {
> + if (ip[i] != ph->ph_magic) {
> + printf("pool(%s): free list modified: "
> + "page %p; item ordinal %d; addr %p "
> + "(p %p); offset 0x%x=0x%x\n",
> + pp->pr_wchan, ph->ph_page, n, pi,
> + page, i * sizeof(int), ip[i]);
> + }
> }
> }
>
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.199
> diff -u -r1.199 kern_sysctl.c
> --- kern/kern_sysctl.c 2 Apr 2011 16:47:17 -0000 1.199
> +++ kern/kern_sysctl.c 3 Apr 2011 23:57:45 -0000
> @@ -270,6 +270,7 @@
> extern int cryptodevallowsoft;
> #endif
> extern int maxlocksperuid;
> + extern int pool_debug;
>
> /* all sysctl names at this level are terminal except a ton of them */
> if (namelen != 1) {
> @@ -591,6 +592,14 @@
> return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
> case KERN_NETLIVELOCKS:
> return (sysctl_rdint(oldp, oldlenp, newp, mcllivelocks));
> + case KERN_POOL_DEBUG:
> + error = sysctl_int(oldp, oldlenp, newp, newlen,
> + &pool_debug);
> + if (error == 0) {
> + pool_reclaim_all();
> + /* XXX How about a second reclaim in a few seconds? */
> + }
> + return (error);
> default:
> return (EOPNOTSUPP);
> }
> Index: sys/sysctl.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/sysctl.h,v
> retrieving revision 1.109
> diff -u -r1.109 sysctl.h
> --- sys/sysctl.h 12 Mar 2011 04:54:28 -0000 1.109
> +++ sys/sysctl.h 3 Apr 2011 22:41:20 -0000
> @@ -188,7 +188,8 @@
> #define KERN_RTHREADS 74 /* kernel rthreads support
> enabled */
> #define KERN_CONSDEV 75 /* dev_t: console terminal
> device */
> #define KERN_NETLIVELOCKS 76 /* int: number of network
> livelocks */
> -#define KERN_MAXID 77 /* number of valid kern ids */
> +#define KERN_POOL_DEBUG 77 /* int: enable pool_debug */
> +#define KERN_MAXID 78 /* number of valid kern ids */
>
> #define CTL_KERN_NAMES { \
> { 0, 0 }, \
> @@ -268,6 +269,7 @@
> { "rthreads", CTLTYPE_INT }, \
> { "consdev", CTLTYPE_STRUCT }, \
> { "netlivelocks", CTLTYPE_INT }, \
> + { "pool_debug", CTLTYPE_INT }, \
> }
>
> /*