On Wed, May 12, 2021 at 07:08:39PM -0500, Scott Cheloha wrote:
> I'm unsure which part of panic(9) is causing the problem he mentions,

I was talking about this:

r620-1# papnpaiancini:cc :p :op
opooolo_llc_ac_caccahhceh_ei_eti_tieetmme_mm__amgamigacigci__cc_hccehhcekcekc::
k :m  bmubmfubfuppflp llc pc pcuup  uf rfferree eel el iilsitss tm tom
omddoidfiiifeifeidde:d ::i ti etietmme m
a  daddardd rd0 r0
xx0fxfffffffffffffddf88d08c0cc0c6c76afc9b3f04500400++01+61 610 6x0
fx0fxffffffffffdffdf88d08
00020720d72a8c0049703eb!ef!e==!0=x009x59x95995b9ebbaee3ae3ae344ef54f5a4bff7db07990a9

It is serial console output caused by the vprintf(fmt, ap) a few
lines below.  Calling printf() from mutliple CPUs.

> If we set panicstr atomically only one CPU will write panicbuf.

We were lucky and the panic string contains only one of them.
Probably the final one.

ddb{4}> show panic
pool_cache_item_magic_check: mbufpl cpu free list modified: item addr
0xfffffd80ccca9300+16 0xfffffd80027dc47e!=0x959bea3e4ffab79a

You diff guarantees that it constains the first panic.  This is
likely the root of the problem.  Setting RB_NOSYNC reliably for all
other CPU is also good.

OK bluhm@

> Index: kern/subr_prf.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/subr_prf.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 subr_prf.c
> --- kern/subr_prf.c   28 Nov 2020 17:53:05 -0000      1.102
> +++ kern/subr_prf.c   13 May 2021 00:04:28 -0000
> @@ -97,7 +97,7 @@ struct mutex kprintf_mutex =
>   */
>  
>  extern       int log_open;   /* subr_log: is /dev/klog open? */
> -const        char *panicstr; /* arg to first call to panic (used as a flag
> +volatile const char *panicstr; /* arg to first call to panic (used as a flag
>                          to indicate that panic has already been called). */
>  const        char *faultstr; /* page fault string */
>  #ifdef DDB
> @@ -195,12 +195,10 @@ panic(const char *fmt, ...)
>  
>       bootopt = RB_AUTOBOOT | RB_DUMP;
>       va_start(ap, fmt);
> -     if (panicstr)
> +     if (atomic_cas_ptr(&panicstr, NULL, panicbuf) != NULL)
>               bootopt |= RB_NOSYNC;
> -     else {
> +     else
>               vsnprintf(panicbuf, sizeof panicbuf, fmt, ap);
> -             panicstr = panicbuf;
> -     }
>       va_end(ap);
>  
>       printf("panic: ");
> Index: sys/systm.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/systm.h,v
> retrieving revision 1.153
> diff -u -p -r1.153 systm.h
> --- sys/systm.h       28 Apr 2021 09:42:04 -0000      1.153
> +++ sys/systm.h       13 May 2021 00:04:28 -0000
> @@ -71,7 +71,7 @@
>   * patched by a stalking hacker.
>   */
>  extern int securelevel;              /* system security level */
> -extern const char *panicstr; /* panic message */
> +extern volatile const char *panicstr;        /* panic message */
>  extern const char *faultstr; /* fault message */
>  extern const char version[];         /* system version */
>  extern const char copyright[];       /* system copyright */

Reply via email to