Re: splassert: ip_output: want 1 have 0

2016-12-23 Thread Mark Kettenis
> Date: Thu, 22 Dec 2016 14:56:43 +0100
> From: Martin Pieuchot 
> 
> On 22/12/16(Thu) 10:45, Martin Pieuchot wrote:
> > On 22/12/16(Thu) 00:32, Mark Kettenis wrote:
> > > splassert: ip_output: want 1 have 0
> > > Starting stack trace...
> > > ip_output() at ip_output+0x7d
> > > ipsp_process_done() at ipsp_process_done+0x2ad
> > > esp_output_cb() at esp_output_cb+0x135
> > > taskq_thread() at taskq_thread+0x6c
> > > end trace frame: 0x0, count: 253
> > > End of stack trace.
> > > 
> > > This makes no sense to me since esp_output_cb() calls
> > > ipsp_process_done() while at splsoftnet.  What am I missing?
> > 
> > It's a missing NET_LOCK(), right now we're abusing splassert() to find
> > code paths where the lock is not held and should be.
> 
> This should fix it, do you confirm?

Looked closer at the diff now, and it looks correct to me.

> Index: netinet/ip_ah.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_ah.c,v
> retrieving revision 1.123
> diff -u -p -r1.123 ip_ah.c
> --- netinet/ip_ah.c   19 Sep 2016 18:09:22 -  1.123
> +++ netinet/ip_ah.c   22 Dec 2016 13:55:02 -
> @@ -1219,7 +1219,7 @@ ah_output_cb(struct cryptop *crp)
>   return (EINVAL);
>   }
>  
> - s = splsoftnet();
> + NET_LOCK(s);
>  
>   tdb = gettdb(tc->tc_rdomain, tc->tc_spi, >tc_dst, tc->tc_proto);
>   if (tdb == NULL) {
> @@ -1236,7 +1236,7 @@ ah_output_cb(struct cryptop *crp)
>   /* Reset the session ID */
>   if (tdb->tdb_cryptoid != 0)
>   tdb->tdb_cryptoid = crp->crp_sid;
> - splx(s);
> + NET_UNLOCK(s);
>   return crypto_dispatch(crp);
>   }
>   free(tc, M_XDATA, 0);
> @@ -1258,11 +1258,11 @@ ah_output_cb(struct cryptop *crp)
>   crypto_freereq(crp);
>  
>   err =  ipsp_process_done(m, tdb);
> - splx(s);
> + NET_UNLOCK(s);
>   return err;
>  
>   baddone:
> - splx(s);
> + NET_UNLOCK(s);
>  
>   m_freem(m);
>  
> Index: netinet/ip_esp.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_esp.c,v
> retrieving revision 1.141
> diff -u -p -r1.141 ip_esp.c
> --- netinet/ip_esp.c  19 Sep 2016 18:09:22 -  1.141
> +++ netinet/ip_esp.c  22 Dec 2016 13:55:30 -
> @@ -1064,7 +1064,7 @@ esp_output_cb(struct cryptop *crp)
>   }
>  
>  
> - s = splsoftnet();
> + NET_LOCK(s);
>  
>   tdb = gettdb(tc->tc_rdomain, tc->tc_spi, >tc_dst, tc->tc_proto);
>   if (tdb == NULL) {
> @@ -1081,7 +1081,7 @@ esp_output_cb(struct cryptop *crp)
>   /* Reset the session ID */
>   if (tdb->tdb_cryptoid != 0)
>   tdb->tdb_cryptoid = crp->crp_sid;
> - splx(s);
> + NET_UNLOCK(s);
>   return crypto_dispatch(crp);
>   }
>   free(tc, M_XDATA, 0);
> @@ -1098,11 +1098,11 @@ esp_output_cb(struct cryptop *crp)
>  
>   /* Call the IPsec input callback. */
>   error = ipsp_process_done(m, tdb);
> - splx(s);
> + NET_UNLOCK(s);
>   return error;
>  
>   baddone:
> - splx(s);
> + NET_UNLOCK(s);
>  
>   m_freem(m);
>  
> Index: netinet/ip_ipcomp.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_ipcomp.c,v
> retrieving revision 1.48
> diff -u -p -r1.48 ip_ipcomp.c
> --- netinet/ip_ipcomp.c   24 Sep 2016 14:51:37 -  1.48
> +++ netinet/ip_ipcomp.c   22 Dec 2016 13:54:18 -
> @@ -554,7 +554,7 @@ ipcomp_output_cb(struct cryptop *crp)
>   return (EINVAL);
>   }
>  
> - s = splsoftnet();
> + NET_LOCK(s);
>  
>   tdb = gettdb(tc->tc_rdomain, tc->tc_spi, >tc_dst, tc->tc_proto);
>   if (tdb == NULL) {
> @@ -571,7 +571,7 @@ ipcomp_output_cb(struct cryptop *crp)
>   /* Reset the session ID */
>   if (tdb->tdb_cryptoid != 0)
>   tdb->tdb_cryptoid = crp->crp_sid;
> - splx(s);
> + NET_UNLOCK(s);
>   return crypto_dispatch(crp);
>   }
>   free(tc, M_XDATA, 0);
> @@ -588,7 +588,7 @@ ipcomp_output_cb(struct cryptop *crp)
>   /* Compression was useless, we have lost time. */
>   crypto_freereq(crp);
>   error = ipsp_process_done(m, tdb);
> - splx(s);
> + NET_UNLOCK(s);
>   return error;
>   }
>  
> @@ -638,11 +638,11 @@ ipcomp_output_cb(struct cryptop *crp)
>   crypto_freereq(crp);
>  
>   error = ipsp_process_done(m, tdb);
> - splx(s);
> + NET_UNLOCK(s);
>   return error;
>  
>  baddone:
> - splx(s);
> + NET_UNLOCK(s);
>  
>   m_freem(m);
>  
> 



Re: splassert: ip_output: want 1 have 0

2016-12-22 Thread Mark Kettenis
> Date: Thu, 22 Dec 2016 14:56:43 +0100
> From: Martin Pieuchot 
> 
> On 22/12/16(Thu) 10:45, Martin Pieuchot wrote:
> > On 22/12/16(Thu) 00:32, Mark Kettenis wrote:
> > > splassert: ip_output: want 1 have 0
> > > Starting stack trace...
> > > ip_output() at ip_output+0x7d
> > > ipsp_process_done() at ipsp_process_done+0x2ad
> > > esp_output_cb() at esp_output_cb+0x135
> > > taskq_thread() at taskq_thread+0x6c
> > > end trace frame: 0x0, count: 253
> > > End of stack trace.
> > > 
> > > This makes no sense to me since esp_output_cb() calls
> > > ipsp_process_done() while at splsoftnet.  What am I missing?
> > 
> > It's a missing NET_LOCK(), right now we're abusing splassert() to find
> > code paths where the lock is not held and should be.
> 
> This should fix it, do you confirm?

Yes, this fixes the problem.  Will run with it for a bit and take a
closer look tomorrow.

> Index: netinet/ip_ah.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_ah.c,v
> retrieving revision 1.123
> diff -u -p -r1.123 ip_ah.c
> --- netinet/ip_ah.c   19 Sep 2016 18:09:22 -  1.123
> +++ netinet/ip_ah.c   22 Dec 2016 13:55:02 -
> @@ -1219,7 +1219,7 @@ ah_output_cb(struct cryptop *crp)
>   return (EINVAL);
>   }
>  
> - s = splsoftnet();
> + NET_LOCK(s);
>  
>   tdb = gettdb(tc->tc_rdomain, tc->tc_spi, >tc_dst, tc->tc_proto);
>   if (tdb == NULL) {
> @@ -1236,7 +1236,7 @@ ah_output_cb(struct cryptop *crp)
>   /* Reset the session ID */
>   if (tdb->tdb_cryptoid != 0)
>   tdb->tdb_cryptoid = crp->crp_sid;
> - splx(s);
> + NET_UNLOCK(s);
>   return crypto_dispatch(crp);
>   }
>   free(tc, M_XDATA, 0);
> @@ -1258,11 +1258,11 @@ ah_output_cb(struct cryptop *crp)
>   crypto_freereq(crp);
>  
>   err =  ipsp_process_done(m, tdb);
> - splx(s);
> + NET_UNLOCK(s);
>   return err;
>  
>   baddone:
> - splx(s);
> + NET_UNLOCK(s);
>  
>   m_freem(m);
>  
> Index: netinet/ip_esp.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_esp.c,v
> retrieving revision 1.141
> diff -u -p -r1.141 ip_esp.c
> --- netinet/ip_esp.c  19 Sep 2016 18:09:22 -  1.141
> +++ netinet/ip_esp.c  22 Dec 2016 13:55:30 -
> @@ -1064,7 +1064,7 @@ esp_output_cb(struct cryptop *crp)
>   }
>  
>  
> - s = splsoftnet();
> + NET_LOCK(s);
>  
>   tdb = gettdb(tc->tc_rdomain, tc->tc_spi, >tc_dst, tc->tc_proto);
>   if (tdb == NULL) {
> @@ -1081,7 +1081,7 @@ esp_output_cb(struct cryptop *crp)
>   /* Reset the session ID */
>   if (tdb->tdb_cryptoid != 0)
>   tdb->tdb_cryptoid = crp->crp_sid;
> - splx(s);
> + NET_UNLOCK(s);
>   return crypto_dispatch(crp);
>   }
>   free(tc, M_XDATA, 0);
> @@ -1098,11 +1098,11 @@ esp_output_cb(struct cryptop *crp)
>  
>   /* Call the IPsec input callback. */
>   error = ipsp_process_done(m, tdb);
> - splx(s);
> + NET_UNLOCK(s);
>   return error;
>  
>   baddone:
> - splx(s);
> + NET_UNLOCK(s);
>  
>   m_freem(m);
>  
> Index: netinet/ip_ipcomp.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_ipcomp.c,v
> retrieving revision 1.48
> diff -u -p -r1.48 ip_ipcomp.c
> --- netinet/ip_ipcomp.c   24 Sep 2016 14:51:37 -  1.48
> +++ netinet/ip_ipcomp.c   22 Dec 2016 13:54:18 -
> @@ -554,7 +554,7 @@ ipcomp_output_cb(struct cryptop *crp)
>   return (EINVAL);
>   }
>  
> - s = splsoftnet();
> + NET_LOCK(s);
>  
>   tdb = gettdb(tc->tc_rdomain, tc->tc_spi, >tc_dst, tc->tc_proto);
>   if (tdb == NULL) {
> @@ -571,7 +571,7 @@ ipcomp_output_cb(struct cryptop *crp)
>   /* Reset the session ID */
>   if (tdb->tdb_cryptoid != 0)
>   tdb->tdb_cryptoid = crp->crp_sid;
> - splx(s);
> + NET_UNLOCK(s);
>   return crypto_dispatch(crp);
>   }
>   free(tc, M_XDATA, 0);
> @@ -588,7 +588,7 @@ ipcomp_output_cb(struct cryptop *crp)
>   /* Compression was useless, we have lost time. */
>   crypto_freereq(crp);
>   error = ipsp_process_done(m, tdb);
> - splx(s);
> + NET_UNLOCK(s);
>   return error;
>   }
>  
> @@ -638,11 +638,11 @@ ipcomp_output_cb(struct cryptop *crp)
>   crypto_freereq(crp);
>  
>   error = ipsp_process_done(m, tdb);
> - splx(s);
> + NET_UNLOCK(s);
>   return error;
>  
>  baddone:
> - splx(s);
> + NET_UNLOCK(s);
>  
>   m_freem(m);
>  

Re: splassert: ip_output: want 1 have 0

2016-12-22 Thread Martin Pieuchot
On 22/12/16(Thu) 10:45, Martin Pieuchot wrote:
> On 22/12/16(Thu) 00:32, Mark Kettenis wrote:
> > splassert: ip_output: want 1 have 0
> > Starting stack trace...
> > ip_output() at ip_output+0x7d
> > ipsp_process_done() at ipsp_process_done+0x2ad
> > esp_output_cb() at esp_output_cb+0x135
> > taskq_thread() at taskq_thread+0x6c
> > end trace frame: 0x0, count: 253
> > End of stack trace.
> > 
> > This makes no sense to me since esp_output_cb() calls
> > ipsp_process_done() while at splsoftnet.  What am I missing?
> 
> It's a missing NET_LOCK(), right now we're abusing splassert() to find
> code paths where the lock is not held and should be.

This should fix it, do you confirm?

Index: netinet/ip_ah.c
===
RCS file: /cvs/src/sys/netinet/ip_ah.c,v
retrieving revision 1.123
diff -u -p -r1.123 ip_ah.c
--- netinet/ip_ah.c 19 Sep 2016 18:09:22 -  1.123
+++ netinet/ip_ah.c 22 Dec 2016 13:55:02 -
@@ -1219,7 +1219,7 @@ ah_output_cb(struct cryptop *crp)
return (EINVAL);
}
 
-   s = splsoftnet();
+   NET_LOCK(s);
 
tdb = gettdb(tc->tc_rdomain, tc->tc_spi, >tc_dst, tc->tc_proto);
if (tdb == NULL) {
@@ -1236,7 +1236,7 @@ ah_output_cb(struct cryptop *crp)
/* Reset the session ID */
if (tdb->tdb_cryptoid != 0)
tdb->tdb_cryptoid = crp->crp_sid;
-   splx(s);
+   NET_UNLOCK(s);
return crypto_dispatch(crp);
}
free(tc, M_XDATA, 0);
@@ -1258,11 +1258,11 @@ ah_output_cb(struct cryptop *crp)
crypto_freereq(crp);
 
err =  ipsp_process_done(m, tdb);
-   splx(s);
+   NET_UNLOCK(s);
return err;
 
  baddone:
-   splx(s);
+   NET_UNLOCK(s);
 
m_freem(m);
 
Index: netinet/ip_esp.c
===
RCS file: /cvs/src/sys/netinet/ip_esp.c,v
retrieving revision 1.141
diff -u -p -r1.141 ip_esp.c
--- netinet/ip_esp.c19 Sep 2016 18:09:22 -  1.141
+++ netinet/ip_esp.c22 Dec 2016 13:55:30 -
@@ -1064,7 +1064,7 @@ esp_output_cb(struct cryptop *crp)
}
 
 
-   s = splsoftnet();
+   NET_LOCK(s);
 
tdb = gettdb(tc->tc_rdomain, tc->tc_spi, >tc_dst, tc->tc_proto);
if (tdb == NULL) {
@@ -1081,7 +1081,7 @@ esp_output_cb(struct cryptop *crp)
/* Reset the session ID */
if (tdb->tdb_cryptoid != 0)
tdb->tdb_cryptoid = crp->crp_sid;
-   splx(s);
+   NET_UNLOCK(s);
return crypto_dispatch(crp);
}
free(tc, M_XDATA, 0);
@@ -1098,11 +1098,11 @@ esp_output_cb(struct cryptop *crp)
 
/* Call the IPsec input callback. */
error = ipsp_process_done(m, tdb);
-   splx(s);
+   NET_UNLOCK(s);
return error;
 
  baddone:
-   splx(s);
+   NET_UNLOCK(s);
 
m_freem(m);
 
Index: netinet/ip_ipcomp.c
===
RCS file: /cvs/src/sys/netinet/ip_ipcomp.c,v
retrieving revision 1.48
diff -u -p -r1.48 ip_ipcomp.c
--- netinet/ip_ipcomp.c 24 Sep 2016 14:51:37 -  1.48
+++ netinet/ip_ipcomp.c 22 Dec 2016 13:54:18 -
@@ -554,7 +554,7 @@ ipcomp_output_cb(struct cryptop *crp)
return (EINVAL);
}
 
-   s = splsoftnet();
+   NET_LOCK(s);
 
tdb = gettdb(tc->tc_rdomain, tc->tc_spi, >tc_dst, tc->tc_proto);
if (tdb == NULL) {
@@ -571,7 +571,7 @@ ipcomp_output_cb(struct cryptop *crp)
/* Reset the session ID */
if (tdb->tdb_cryptoid != 0)
tdb->tdb_cryptoid = crp->crp_sid;
-   splx(s);
+   NET_UNLOCK(s);
return crypto_dispatch(crp);
}
free(tc, M_XDATA, 0);
@@ -588,7 +588,7 @@ ipcomp_output_cb(struct cryptop *crp)
/* Compression was useless, we have lost time. */
crypto_freereq(crp);
error = ipsp_process_done(m, tdb);
-   splx(s);
+   NET_UNLOCK(s);
return error;
}
 
@@ -638,11 +638,11 @@ ipcomp_output_cb(struct cryptop *crp)
crypto_freereq(crp);
 
error = ipsp_process_done(m, tdb);
-   splx(s);
+   NET_UNLOCK(s);
return error;
 
 baddone:
-   splx(s);
+   NET_UNLOCK(s);
 
m_freem(m);
 



Re: splassert: ip_output: want 1 have 0

2016-12-22 Thread Martin Pieuchot
On 22/12/16(Thu) 00:32, Mark Kettenis wrote:
> splassert: ip_output: want 1 have 0
> Starting stack trace...
> ip_output() at ip_output+0x7d
> ipsp_process_done() at ipsp_process_done+0x2ad
> esp_output_cb() at esp_output_cb+0x135
> taskq_thread() at taskq_thread+0x6c
> end trace frame: 0x0, count: 253
> End of stack trace.
> 
> This makes no sense to me since esp_output_cb() calls
> ipsp_process_done() while at splsoftnet.  What am I missing?

It's a missing NET_LOCK(), right now we're abusing splassert() to find
code paths where the lock is not held and should be.