Re: splassert: ip_output: want 1 have 0
> Date: Thu, 22 Dec 2016 14:56:43 +0100 > From: Martin Pieuchot > > On 22/12/16(Thu) 10:45, Martin Pieuchot wrote: > > On 22/12/16(Thu) 00:32, Mark Kettenis wrote: > > > splassert: ip_output: want 1 have 0 > > > Starting stack trace... > > > ip_output() at ip_output+0x7d > > > ipsp_process_done() at ipsp_process_done+0x2ad > > > esp_output_cb() at esp_output_cb+0x135 > > > taskq_thread() at taskq_thread+0x6c > > > end trace frame: 0x0, count: 253 > > > End of stack trace. > > > > > > This makes no sense to me since esp_output_cb() calls > > > ipsp_process_done() while at splsoftnet. What am I missing? > > > > It's a missing NET_LOCK(), right now we're abusing splassert() to find > > code paths where the lock is not held and should be. > > This should fix it, do you confirm? Looked closer at the diff now, and it looks correct to me. > Index: netinet/ip_ah.c > === > RCS file: /cvs/src/sys/netinet/ip_ah.c,v > retrieving revision 1.123 > diff -u -p -r1.123 ip_ah.c > --- netinet/ip_ah.c 19 Sep 2016 18:09:22 - 1.123 > +++ netinet/ip_ah.c 22 Dec 2016 13:55:02 - > @@ -1219,7 +1219,7 @@ ah_output_cb(struct cryptop *crp) > return (EINVAL); > } > > - s = splsoftnet(); > + NET_LOCK(s); > > tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto); > if (tdb == NULL) { > @@ -1236,7 +1236,7 @@ ah_output_cb(struct cryptop *crp) > /* Reset the session ID */ > if (tdb->tdb_cryptoid != 0) > tdb->tdb_cryptoid = crp->crp_sid; > - splx(s); > + NET_UNLOCK(s); > return crypto_dispatch(crp); > } > free(tc, M_XDATA, 0); > @@ -1258,11 +1258,11 @@ ah_output_cb(struct cryptop *crp) > crypto_freereq(crp); > > err = ipsp_process_done(m, tdb); > - splx(s); > + NET_UNLOCK(s); > return err; > > baddone: > - splx(s); > + NET_UNLOCK(s); > > m_freem(m); > > Index: netinet/ip_esp.c > === > RCS file: /cvs/src/sys/netinet/ip_esp.c,v > retrieving revision 1.141 > diff -u -p -r1.141 ip_esp.c > --- netinet/ip_esp.c 19 Sep 2016 18:09:22 - 1.141 > +++ netinet/ip_esp.c 22 Dec 2016 13:55:30 - > @@ -1064,7 +1064,7 @@ esp_output_cb(struct cryptop *crp) > } > > > - s = splsoftnet(); > + NET_LOCK(s); > > tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto); > if (tdb == NULL) { > @@ -1081,7 +1081,7 @@ esp_output_cb(struct cryptop *crp) > /* Reset the session ID */ > if (tdb->tdb_cryptoid != 0) > tdb->tdb_cryptoid = crp->crp_sid; > - splx(s); > + NET_UNLOCK(s); > return crypto_dispatch(crp); > } > free(tc, M_XDATA, 0); > @@ -1098,11 +1098,11 @@ esp_output_cb(struct cryptop *crp) > > /* Call the IPsec input callback. */ > error = ipsp_process_done(m, tdb); > - splx(s); > + NET_UNLOCK(s); > return error; > > baddone: > - splx(s); > + NET_UNLOCK(s); > > m_freem(m); > > Index: netinet/ip_ipcomp.c > === > RCS file: /cvs/src/sys/netinet/ip_ipcomp.c,v > retrieving revision 1.48 > diff -u -p -r1.48 ip_ipcomp.c > --- netinet/ip_ipcomp.c 24 Sep 2016 14:51:37 - 1.48 > +++ netinet/ip_ipcomp.c 22 Dec 2016 13:54:18 - > @@ -554,7 +554,7 @@ ipcomp_output_cb(struct cryptop *crp) > return (EINVAL); > } > > - s = splsoftnet(); > + NET_LOCK(s); > > tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto); > if (tdb == NULL) { > @@ -571,7 +571,7 @@ ipcomp_output_cb(struct cryptop *crp) > /* Reset the session ID */ > if (tdb->tdb_cryptoid != 0) > tdb->tdb_cryptoid = crp->crp_sid; > - splx(s); > + NET_UNLOCK(s); > return crypto_dispatch(crp); > } > free(tc, M_XDATA, 0); > @@ -588,7 +588,7 @@ ipcomp_output_cb(struct cryptop *crp) > /* Compression was useless, we have lost time. */ > crypto_freereq(crp); > error = ipsp_process_done(m, tdb); > - splx(s); > + NET_UNLOCK(s); > return error; > } > > @@ -638,11 +638,11 @@ ipcomp_output_cb(struct cryptop *crp) > crypto_freereq(crp); > > error = ipsp_process_done(m, tdb); > - splx(s); > + NET_UNLOCK(s); > return error; > > baddone: > - splx(s); > + NET_UNLOCK(s); > > m_freem(m); > >
Re: splassert: ip_output: want 1 have 0
> Date: Thu, 22 Dec 2016 14:56:43 +0100 > From: Martin Pieuchot > > On 22/12/16(Thu) 10:45, Martin Pieuchot wrote: > > On 22/12/16(Thu) 00:32, Mark Kettenis wrote: > > > splassert: ip_output: want 1 have 0 > > > Starting stack trace... > > > ip_output() at ip_output+0x7d > > > ipsp_process_done() at ipsp_process_done+0x2ad > > > esp_output_cb() at esp_output_cb+0x135 > > > taskq_thread() at taskq_thread+0x6c > > > end trace frame: 0x0, count: 253 > > > End of stack trace. > > > > > > This makes no sense to me since esp_output_cb() calls > > > ipsp_process_done() while at splsoftnet. What am I missing? > > > > It's a missing NET_LOCK(), right now we're abusing splassert() to find > > code paths where the lock is not held and should be. > > This should fix it, do you confirm? Yes, this fixes the problem. Will run with it for a bit and take a closer look tomorrow. > Index: netinet/ip_ah.c > === > RCS file: /cvs/src/sys/netinet/ip_ah.c,v > retrieving revision 1.123 > diff -u -p -r1.123 ip_ah.c > --- netinet/ip_ah.c 19 Sep 2016 18:09:22 - 1.123 > +++ netinet/ip_ah.c 22 Dec 2016 13:55:02 - > @@ -1219,7 +1219,7 @@ ah_output_cb(struct cryptop *crp) > return (EINVAL); > } > > - s = splsoftnet(); > + NET_LOCK(s); > > tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto); > if (tdb == NULL) { > @@ -1236,7 +1236,7 @@ ah_output_cb(struct cryptop *crp) > /* Reset the session ID */ > if (tdb->tdb_cryptoid != 0) > tdb->tdb_cryptoid = crp->crp_sid; > - splx(s); > + NET_UNLOCK(s); > return crypto_dispatch(crp); > } > free(tc, M_XDATA, 0); > @@ -1258,11 +1258,11 @@ ah_output_cb(struct cryptop *crp) > crypto_freereq(crp); > > err = ipsp_process_done(m, tdb); > - splx(s); > + NET_UNLOCK(s); > return err; > > baddone: > - splx(s); > + NET_UNLOCK(s); > > m_freem(m); > > Index: netinet/ip_esp.c > === > RCS file: /cvs/src/sys/netinet/ip_esp.c,v > retrieving revision 1.141 > diff -u -p -r1.141 ip_esp.c > --- netinet/ip_esp.c 19 Sep 2016 18:09:22 - 1.141 > +++ netinet/ip_esp.c 22 Dec 2016 13:55:30 - > @@ -1064,7 +1064,7 @@ esp_output_cb(struct cryptop *crp) > } > > > - s = splsoftnet(); > + NET_LOCK(s); > > tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto); > if (tdb == NULL) { > @@ -1081,7 +1081,7 @@ esp_output_cb(struct cryptop *crp) > /* Reset the session ID */ > if (tdb->tdb_cryptoid != 0) > tdb->tdb_cryptoid = crp->crp_sid; > - splx(s); > + NET_UNLOCK(s); > return crypto_dispatch(crp); > } > free(tc, M_XDATA, 0); > @@ -1098,11 +1098,11 @@ esp_output_cb(struct cryptop *crp) > > /* Call the IPsec input callback. */ > error = ipsp_process_done(m, tdb); > - splx(s); > + NET_UNLOCK(s); > return error; > > baddone: > - splx(s); > + NET_UNLOCK(s); > > m_freem(m); > > Index: netinet/ip_ipcomp.c > === > RCS file: /cvs/src/sys/netinet/ip_ipcomp.c,v > retrieving revision 1.48 > diff -u -p -r1.48 ip_ipcomp.c > --- netinet/ip_ipcomp.c 24 Sep 2016 14:51:37 - 1.48 > +++ netinet/ip_ipcomp.c 22 Dec 2016 13:54:18 - > @@ -554,7 +554,7 @@ ipcomp_output_cb(struct cryptop *crp) > return (EINVAL); > } > > - s = splsoftnet(); > + NET_LOCK(s); > > tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto); > if (tdb == NULL) { > @@ -571,7 +571,7 @@ ipcomp_output_cb(struct cryptop *crp) > /* Reset the session ID */ > if (tdb->tdb_cryptoid != 0) > tdb->tdb_cryptoid = crp->crp_sid; > - splx(s); > + NET_UNLOCK(s); > return crypto_dispatch(crp); > } > free(tc, M_XDATA, 0); > @@ -588,7 +588,7 @@ ipcomp_output_cb(struct cryptop *crp) > /* Compression was useless, we have lost time. */ > crypto_freereq(crp); > error = ipsp_process_done(m, tdb); > - splx(s); > + NET_UNLOCK(s); > return error; > } > > @@ -638,11 +638,11 @@ ipcomp_output_cb(struct cryptop *crp) > crypto_freereq(crp); > > error = ipsp_process_done(m, tdb); > - splx(s); > + NET_UNLOCK(s); > return error; > > baddone: > - splx(s); > + NET_UNLOCK(s); > > m_freem(m); > >
Re: splassert: ip_output: want 1 have 0
On 22/12/16(Thu) 10:45, Martin Pieuchot wrote: > On 22/12/16(Thu) 00:32, Mark Kettenis wrote: > > splassert: ip_output: want 1 have 0 > > Starting stack trace... > > ip_output() at ip_output+0x7d > > ipsp_process_done() at ipsp_process_done+0x2ad > > esp_output_cb() at esp_output_cb+0x135 > > taskq_thread() at taskq_thread+0x6c > > end trace frame: 0x0, count: 253 > > End of stack trace. > > > > This makes no sense to me since esp_output_cb() calls > > ipsp_process_done() while at splsoftnet. What am I missing? > > It's a missing NET_LOCK(), right now we're abusing splassert() to find > code paths where the lock is not held and should be. This should fix it, do you confirm? Index: netinet/ip_ah.c === RCS file: /cvs/src/sys/netinet/ip_ah.c,v retrieving revision 1.123 diff -u -p -r1.123 ip_ah.c --- netinet/ip_ah.c 19 Sep 2016 18:09:22 - 1.123 +++ netinet/ip_ah.c 22 Dec 2016 13:55:02 - @@ -1219,7 +1219,7 @@ ah_output_cb(struct cryptop *crp) return (EINVAL); } - s = splsoftnet(); + NET_LOCK(s); tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto); if (tdb == NULL) { @@ -1236,7 +1236,7 @@ ah_output_cb(struct cryptop *crp) /* Reset the session ID */ if (tdb->tdb_cryptoid != 0) tdb->tdb_cryptoid = crp->crp_sid; - splx(s); + NET_UNLOCK(s); return crypto_dispatch(crp); } free(tc, M_XDATA, 0); @@ -1258,11 +1258,11 @@ ah_output_cb(struct cryptop *crp) crypto_freereq(crp); err = ipsp_process_done(m, tdb); - splx(s); + NET_UNLOCK(s); return err; baddone: - splx(s); + NET_UNLOCK(s); m_freem(m); Index: netinet/ip_esp.c === RCS file: /cvs/src/sys/netinet/ip_esp.c,v retrieving revision 1.141 diff -u -p -r1.141 ip_esp.c --- netinet/ip_esp.c19 Sep 2016 18:09:22 - 1.141 +++ netinet/ip_esp.c22 Dec 2016 13:55:30 - @@ -1064,7 +1064,7 @@ esp_output_cb(struct cryptop *crp) } - s = splsoftnet(); + NET_LOCK(s); tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto); if (tdb == NULL) { @@ -1081,7 +1081,7 @@ esp_output_cb(struct cryptop *crp) /* Reset the session ID */ if (tdb->tdb_cryptoid != 0) tdb->tdb_cryptoid = crp->crp_sid; - splx(s); + NET_UNLOCK(s); return crypto_dispatch(crp); } free(tc, M_XDATA, 0); @@ -1098,11 +1098,11 @@ esp_output_cb(struct cryptop *crp) /* Call the IPsec input callback. */ error = ipsp_process_done(m, tdb); - splx(s); + NET_UNLOCK(s); return error; baddone: - splx(s); + NET_UNLOCK(s); m_freem(m); Index: netinet/ip_ipcomp.c === RCS file: /cvs/src/sys/netinet/ip_ipcomp.c,v retrieving revision 1.48 diff -u -p -r1.48 ip_ipcomp.c --- netinet/ip_ipcomp.c 24 Sep 2016 14:51:37 - 1.48 +++ netinet/ip_ipcomp.c 22 Dec 2016 13:54:18 - @@ -554,7 +554,7 @@ ipcomp_output_cb(struct cryptop *crp) return (EINVAL); } - s = splsoftnet(); + NET_LOCK(s); tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto); if (tdb == NULL) { @@ -571,7 +571,7 @@ ipcomp_output_cb(struct cryptop *crp) /* Reset the session ID */ if (tdb->tdb_cryptoid != 0) tdb->tdb_cryptoid = crp->crp_sid; - splx(s); + NET_UNLOCK(s); return crypto_dispatch(crp); } free(tc, M_XDATA, 0); @@ -588,7 +588,7 @@ ipcomp_output_cb(struct cryptop *crp) /* Compression was useless, we have lost time. */ crypto_freereq(crp); error = ipsp_process_done(m, tdb); - splx(s); + NET_UNLOCK(s); return error; } @@ -638,11 +638,11 @@ ipcomp_output_cb(struct cryptop *crp) crypto_freereq(crp); error = ipsp_process_done(m, tdb); - splx(s); + NET_UNLOCK(s); return error; baddone: - splx(s); + NET_UNLOCK(s); m_freem(m);
Re: splassert: ip_output: want 1 have 0
On 22/12/16(Thu) 00:32, Mark Kettenis wrote: > splassert: ip_output: want 1 have 0 > Starting stack trace... > ip_output() at ip_output+0x7d > ipsp_process_done() at ipsp_process_done+0x2ad > esp_output_cb() at esp_output_cb+0x135 > taskq_thread() at taskq_thread+0x6c > end trace frame: 0x0, count: 253 > End of stack trace. > > This makes no sense to me since esp_output_cb() calls > ipsp_process_done() while at splsoftnet. What am I missing? It's a missing NET_LOCK(), right now we're abusing splassert() to find code paths where the lock is not held and should be.