Re: kernel crash in tcp_subr.c:2386

2024-02-12 Thread Cy Schubert
In message <20240212193044.e089d...@slippy.cwsent.com>, Cy Schubert writes:
> In message <625e0ea4-9413-45ad-b05c-500833a1d...@freebsd.org>, 
> tuexen@freebsd.o
> rg writes:
> > > On Feb 12, 2024, at 10:36, Alexander Leidinger =
> >  wrote:
> > >=20
> > > Hi,
> > >=20
> > > I got a coredump with sources from 2024-02-10-144617 (GMT+0100):
> > Hi Alexander,
> >
> > we are aware of this problem, but haven't found a way to reproduce it.
> > Do you know how to reproduce this?
>
> I've reproduced this by rebooting any one of my machines in my basement. 
> The other machines will panic as below.
>
> I've reverted the three tcp timer commits, expecting one of them to be the 
> cause.

Another data point:

I build on a build machine and NFS mount /usr/obj on my other machines. 
Another symptom of this problem is that the NFS share will appear 
corrupted. And df -htnfs will sometimes not display the mounted NFS share. 
If not a kernel page fault, random kernel memory can be overwritten 
resulting in bizarre behaviour prior.


-- 
Cheers,
Cy Schubert 
FreeBSD UNIX: Web:  https://FreeBSD.org
NTP:   Web:  https://nwtime.org

e^(i*pi)+1=0





Re: kernel crash in tcp_subr.c:2386

2024-02-12 Thread Cy Schubert
In message <625e0ea4-9413-45ad-b05c-500833a1d...@freebsd.org>, 
tuexen@freebsd.o
rg writes:
> > On Feb 12, 2024, at 10:36, Alexander Leidinger =
>  wrote:
> >=20
> > Hi,
> >=20
> > I got a coredump with sources from 2024-02-10-144617 (GMT+0100):
> Hi Alexander,
>
> we are aware of this problem, but haven't found a way to reproduce it.
> Do you know how to reproduce this?

I've reproduced this by rebooting any one of my machines in my basement. 
The other machines will panic as below.

I've reverted the three tcp timer commits, expecting one of them to be the 
cause.

>
> Best regards
> Michael
> > ---snip---
> > __curthread () at =
> /space/system/usr_src/sys/amd64/include/pcpu_aux.h:57
> > 57  __asm("movq %%gs:%P1,%0" : "=3Dr" (td) : "n" =
> (offsetof(struct pcpu,
> > (kgdb) #0  __curthread () at =
> /space/system/usr_src/sys/amd64/include/pcpu_aux.h:57
> >td =3D 
> > #1  doadump (textdump=3Dtextdump@entry=3D1)
> >at /space/system/usr_src/sys/kern/kern_shutdown.c:403
> >error =3D 0
> >coredump =3D 
> > #2  0x8052fe85 in kern_reboot (howto=3D260)
> >at /space/system/usr_src/sys/kern/kern_shutdown.c:521
> >once =3D 0
> >__pc =3D 
> > #3  0x80530382 in vpanic (
> >fmt=3D0x808df476 "Assertion %s failed at %s:%d",
> >ap=3Dap@entry=3D0xfe08a079ebf0)
> >at /space/system/usr_src/sys/kern/kern_shutdownc:973
> >buf =3D "Assertion !callout_active(>t_callout) failed at =
> /space/system/usr_src/sys/netinet/tcp_subr.c:2386", '\000'  times>
> >__pc =3D 
> >__pc =3D 
> >__pc =3D 
> >other_cpus =3D {__bits =3D {14680063, 0 }}
> >td =3D 0xf8068ef99740
> >bootopt =3D 
> >newpanic =3D 
> > #4  0x805301d3 in panic (fmt=3D)
> >at /space/system/usr_src/sys/kern/kern_shutdown.c:889
> >ap =3D {{gp_offset =3D 32, fp_offset =3D 48,
> >overflow_arg_area =3D 0xfe08a079ec20,
> >reg_save_area =3D 0xfe08a079ebc0}}
> > #5  0x806c9d8c in tcp_discardcb =
> (tp=3Dtp@entry=3D0xf80af441ba80)
> >at /space/system/usr_src/sys/netinet/tcp_subr.c:2386
> >inp =3D 0xf80af441ba80
> >so =3D 0xf804d23d2780
> >m =3D 
> >isipv6 =3D 
> > #6  0x806d6291 in tcp_usr_detach (so=3D0xf804d23d2780)
> >at /space/system/usr_src/sys/netinet/tcp_usrreq.c:214
> >inp =3D 0xf80af441ba80
> >tp =3D 0xf80af441ba80
> > #7  0x805dba57 in sofree (so=3D0xf804d23d2780)
> >at /space/system/usr_src/sys/kern/uipc_socket.c:1205
> >pr =3D 0x80a8bd18 
> > #8  sorele_locked (so=3Dso@entry=3D0xf804d23d2780)
> >at /space/system/usr_src/sys/kern/uipc_socket.c:1232
> > No locals.
> > #9  0x805dc8c0 in soclose (so=3D0xf804d23d2780)
> >at /space/system/usr_src/sys/kern/uipc_socket.c:1302
> >lqueue =3D {tqh_first =3D 0xf8068ef99740,
> >  tqh_last =3D 0xfe08a079ed40}
> >error =3D 0
> >saved_vnet =3D 0x0
> >last =3D 
> >listening =3D 
> > #10 0x804ccbd1 in fo_close (fp=3D0xf805f2dfc500, =
> td=3D)
> >at /space/system/usr_src/sys/sys/file.h:390
> > No locals.
> > #11 _fdrop (fp=3Dfp@entry=3D0xf805f2dfc500, td=3D,
> >td@entry=3D0xf8068ef99740)
> >at /space/system/usr_src/sys/kern/kern_descrip.c:3666
> >count =3D 
> >error =3D 
> > #12 0x804d02f3 in closef (fp=3Dfp@entry=3D0xf805f2dfc500,
> >td=3Dtd@entry=3D0xf8068ef99740)
> >at /space/system/usr_src/sys/kern/kern_descrip.c:2839
> >_error =3D 0
> >_fp =3D 0xf805f2dfc500
> >lf =3D {l_start =3D -8791759350504, l_len =3D -8791759350528, =
> l_pid =3D 0,
> >  l_type =3D 0, l_whence =3D 0, l_sysid =3D 0}
> >vp =3D 
> >fdtol =3D 
> >fdp =3D 
> > #13 0x804cd50c in closefp_impl (fdp=3D0xfe07afebf860, =
> fd=3D19,
> >fp=3D0xf805f2dfc500, td=3D0xf8068ef99740, audit=3D out>)
> >at /space/system/usr_src/sys/kern/kern_descrip.c:1315
> >error =3D 
> > #14 closefp (fdp=3D0xfe07afebf860, fd=3D19, fp=3D0xf805f2dfc500,=
>
> >td=3D0xf8068ef99740, holdleaders=3Dtrue, audit=3D out>)
> >at /space/system/usr_src/sys/kern/kern_descrip.c:1372
> > No locals.
> > #15 0x808597d6 in syscallenter (td=3D0xf8068ef99740)
> >at =
> /space/system/usr_src/sys/amd64/amd64/../../kern/subr_syscall.c:186
> >se =3D 0x80a48330 
> >p =3D 0xfe07f29995c0
> >sa =3D 0xf8068ef99b30
> >error =3D 
> >sy_thr_static =3D 
> >traced =3D 
> > #16 amd64_syscall (td=3D0xf8068ef99740, traced=3D0)
> >at /space/system/usr_src/sys/amd64/amd64/trap.c:1192
> >ksi =3D {ksi_link =3D {tqe_next =3D 0xfe08a079ef30,
> >tqe_prev =3D 0x808588af }, ksi_info =3D =
> {
> >si_signo 

Re: kernel crash in tcp_subr.c:2386

2024-02-12 Thread tuexen
> On Feb 12, 2024, at 10:36, Alexander Leidinger  
> wrote:
> 
> Hi,
> 
> I got a coredump with sources from 2024-02-10-144617 (GMT+0100):
Hi Alexander,

we are aware of this problem, but haven't found a way to reproduce it.
Do you know how to reproduce this?

Best regards
Michael
> ---snip---
> __curthread () at /space/system/usr_src/sys/amd64/include/pcpu_aux.h:57
> 57  __asm("movq %%gs:%P1,%0" : "=r" (td) : "n" (offsetof(struct 
> pcpu,
> (kgdb) #0  __curthread () at 
> /space/system/usr_src/sys/amd64/include/pcpu_aux.h:57
>td = 
> #1  doadump (textdump=textdump@entry=1)
>at /space/system/usr_src/sys/kern/kern_shutdown.c:403
>error = 0
>coredump = 
> #2  0x8052fe85 in kern_reboot (howto=260)
>at /space/system/usr_src/sys/kern/kern_shutdown.c:521
>once = 0
>__pc = 
> #3  0x80530382 in vpanic (
>fmt=0x808df476 "Assertion %s failed at %s:%d",
>ap=ap@entry=0xfe08a079ebf0)
>at /space/system/usr_src/sys/kern/kern_shutdownc:973
>buf = "Assertion !callout_active(>t_callout) failed at 
> /space/system/usr_src/sys/netinet/tcp_subr.c:2386", '\000' 
>__pc = 
>__pc = 
>__pc = 
>other_cpus = {__bits = {14680063, 0 }}
>td = 0xf8068ef99740
>bootopt = 
>newpanic = 
> #4  0x805301d3 in panic (fmt=)
>at /space/system/usr_src/sys/kern/kern_shutdown.c:889
>ap = {{gp_offset = 32, fp_offset = 48,
>overflow_arg_area = 0xfe08a079ec20,
>reg_save_area = 0xfe08a079ebc0}}
> #5  0x806c9d8c in tcp_discardcb (tp=tp@entry=0xf80af441ba80)
>at /space/system/usr_src/sys/netinet/tcp_subr.c:2386
>inp = 0xf80af441ba80
>so = 0xf804d23d2780
>m = 
>isipv6 = 
> #6  0x806d6291 in tcp_usr_detach (so=0xf804d23d2780)
>at /space/system/usr_src/sys/netinet/tcp_usrreq.c:214
>inp = 0xf80af441ba80
>tp = 0xf80af441ba80
> #7  0x805dba57 in sofree (so=0xf804d23d2780)
>at /space/system/usr_src/sys/kern/uipc_socket.c:1205
>pr = 0x80a8bd18 
> #8  sorele_locked (so=so@entry=0xf804d23d2780)
>at /space/system/usr_src/sys/kern/uipc_socket.c:1232
> No locals.
> #9  0x805dc8c0 in soclose (so=0xf804d23d2780)
>at /space/system/usr_src/sys/kern/uipc_socket.c:1302
>lqueue = {tqh_first = 0xf8068ef99740,
>  tqh_last = 0xfe08a079ed40}
>error = 0
>saved_vnet = 0x0
>last = 
>listening = 
> #10 0x804ccbd1 in fo_close (fp=0xf805f2dfc500, td=)
>at /space/system/usr_src/sys/sys/file.h:390
> No locals.
> #11 _fdrop (fp=fp@entry=0xf805f2dfc500, td=,
>td@entry=0xf8068ef99740)
>at /space/system/usr_src/sys/kern/kern_descrip.c:3666
>count = 
>error = 
> #12 0x804d02f3 in closef (fp=fp@entry=0xf805f2dfc500,
>td=td@entry=0xf8068ef99740)
>at /space/system/usr_src/sys/kern/kern_descrip.c:2839
>_error = 0
>_fp = 0xf805f2dfc500
>lf = {l_start = -8791759350504, l_len = -8791759350528, l_pid = 0,
>  l_type = 0, l_whence = 0, l_sysid = 0}
>vp = 
>fdtol = 
>fdp = 
> #13 0x804cd50c in closefp_impl (fdp=0xfe07afebf860, fd=19,
>fp=0xf805f2dfc500, td=0xf8068ef99740, audit=)
>at /space/system/usr_src/sys/kern/kern_descrip.c:1315
>error = 
> #14 closefp (fdp=0xfe07afebf860, fd=19, fp=0xf805f2dfc500,
>td=0xf8068ef99740, holdleaders=true, audit=)
>at /space/system/usr_src/sys/kern/kern_descrip.c:1372
> No locals.
> #15 0x808597d6 in syscallenter (td=0xf8068ef99740)
>at /space/system/usr_src/sys/amd64/amd64/../../kern/subr_syscall.c:186
>se = 0x80a48330 
>p = 0xfe07f29995c0
>sa = 0xf8068ef99b30
>error = 
>sy_thr_static = 
>traced = 
> #16 amd64_syscall (td=0xf8068ef99740, traced=0)
>at /space/system/usr_src/sys/amd64/amd64/trap.c:1192
>ksi = {ksi_link = {tqe_next = 0xfe08a079ef30,
>tqe_prev = 0x808588af }, ksi_info = {
>si_signo = 1, si_errno = 0, si_code = 2015268872, si_pid = -512,
>si_uid = 2398721856, si_status = -2042,
>si_addr = 0xfe08a079ef40, si_value = {sival_int = -1602621824,
>  sival_ptr = 0xfe08a079ee80, sigval_int = -1602621824,
>  sigval_ptr = 0xfe08a079ee80}, _reason = {_fault = {
>_trapno = 1489045984}, _timer = {_timerid = 1489045984,
>_overrun = 17999}, _mesgq = {_mqd = 1489045984}, _poll = {
>_band = 77306605406688}, _capsicum = {_syscall = 1489045984},
>  __spare__ = {__spare1__ = 77306605406688, __spare2__ = {
>  1489814048, 17999, 208, 0, 0, 0, 992191072,
>  ksi_flags = 975329968, ksi_sigq =