> Date: Tue, 13 Sep 2016 10:30:03 +0200
> From: Martin Pieuchot <[email protected]>
> 
> On 12/09/16(Mon) 13:54, Martin Pieuchot wrote:
> > TCP timers end up calling ip_output(), which will soon require that the
> > caller holds a rwlock.
> > 
> > In order to do that, I diff below changes 'struct tcptimer' to become a
> > tuple of (timeout, task).  When a timeout fires, the corresponding task
> > gets scheduled.
> > 
> > I'm currently using the ``softnettq'' to schedule everything related to
> > the network.  This is the safest approach, because concurrent tasks are
> > still serialized.
> > 
> > I extended the G/C timeout to 1sec, because there's no reference count
> > for 'struct tcpcb'.  And the only race possible is described in the
> > comment below:
> > 
> >     /*
> >      * If one of the timer tasks is already running, it must
> >      * be spinning on the KERNEL_LOCK().  So schedule a G/C
> >      * in one second, when we known the task will have released
> >      * its reference.
> >      */
> > 
> > Since the first thing task are doing is checking for the ``TF_DEAD''
> > flag this should be enough.
> > 
> > Comments, oks?
> 
> So I heard a lot of comments about using a specific API for timed task.
> I agreed with all of them but think it's a second step.
> 
> Apart from that, do you have any other concern?  Ok?

I'm a bit concerned about the KERNEL_LOCK here.  Mixing locked tasks
and unlocked tasks in the same thread should be avoided if possible.
But if tasks need to be properly serialized, you have no cjoice.

> > Index: netinet/tcp_timer.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/tcp_timer.c,v
> > retrieving revision 1.49
> > diff -u -p -r1.49 tcp_timer.c
> > --- netinet/tcp_timer.c     7 Mar 2016 18:44:00 -0000       1.49
> > +++ netinet/tcp_timer.c     12 Sep 2016 11:42:16 -0000
> > @@ -71,6 +71,11 @@ void     tcp_timer_persist(void *);
> >  void       tcp_timer_keep(void *);
> >  void       tcp_timer_2msl(void *);
> >  
> > +void       tcp_timer_rexmt_task(void *);
> > +void       tcp_timer_persist_task(void *);
> > +void       tcp_timer_keep_task(void *);
> > +void       tcp_timer_2msl_task(void *);
> > +
> >  const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
> >     tcp_timer_rexmt,
> >     tcp_timer_persist,
> > @@ -78,6 +83,13 @@ const tcp_timer_func_t tcp_timer_funcs[T
> >     tcp_timer_2msl,
> >  };
> >  
> > +const tcp_timer_func_t tcp_timer_tasks[TCPT_NTIMERS] = {
> > +   tcp_timer_rexmt_task,
> > +   tcp_timer_persist_task,
> > +   tcp_timer_keep_task,
> > +   tcp_timer_2msl_task,
> > +};
> > +
> >  /*
> >   * Timer state initialization, called from tcp_init().
> >   */
> > @@ -105,6 +117,14 @@ void
> >  tcp_delack(void *arg)
> >  {
> >     struct tcpcb *tp = arg;
> > +
> > +   task_add(softnettq, &tp->t_delack_to_task);
> > +}
> > +
> > +void
> > +tcp_delack_task(void *arg)
> > +{
> > +   struct tcpcb *tp = arg;
> >     int s;
> >  
> >     /*
> > @@ -112,15 +132,15 @@ tcp_delack(void *arg)
> >      * for whatever reason, it will restart the delayed
> >      * ACK callout.
> >      */
> > -
> > +   KERNEL_LOCK();
> >     s = splsoftnet();
> > -   if (tp->t_flags & TF_DEAD) {
> > -           splx(s);
> > -           return;
> > -   }
> > +   if (tp->t_flags & TF_DEAD)
> > +           goto out;
> >     tp->t_flags |= TF_ACKNOW;
> >     (void) tcp_output(tp);
> > + out:
> >     splx(s);
> > +   KERNEL_UNLOCK();
> >  }
> >  
> >  /*
> > @@ -144,8 +164,7 @@ tcp_slowtimo(void)
> >   * Cancel all timers for TCP tp.
> >   */
> >  void
> > -tcp_canceltimers(tp)
> > -   struct tcpcb *tp;
> > +tcp_canceltimers(struct tcpcb *tp)
> >  {
> >     int i;
> >  
> > @@ -153,6 +172,15 @@ tcp_canceltimers(tp)
> >             TCP_TIMER_DISARM(tp, i);
> >  }
> >  
> > +void
> > +tcp_destroytimers(struct tcpcb *tp)
> > +{
> > +   int i;
> > +
> > +   for (i = 0; i < TCPT_NTIMERS; i++)
> > +           TCP_TIMER_DESTROY(tp, i);
> > +}
> > +
> >  int        tcp_backoff[TCP_MAXRXTSHIFT + 1] =
> >      { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
> >  
> > @@ -191,14 +219,21 @@ void
> >  tcp_timer_rexmt(void *arg)
> >  {
> >     struct tcpcb *tp = arg;
> > +
> > +   task_add(softnettq, &tp->t_timer[TCPT_REXMT].tcpt_task);
> > +}
> > +
> > +void
> > +tcp_timer_rexmt_task(void *arg)
> > +{
> > +   struct tcpcb *tp = arg;
> >     uint32_t rto;
> >     int s;
> >  
> > +   KERNEL_LOCK();
> >     s = splsoftnet();
> > -   if (tp->t_flags & TF_DEAD) {
> > -           splx(s);
> > -           return;
> > -   }
> > +   if (tp->t_flags & TF_DEAD)
> > +           goto out;
> >  
> >     if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
> >         SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
> > @@ -225,8 +260,7 @@ tcp_timer_rexmt(void *arg)
> >             sin.sin_addr = tp->t_inpcb->inp_faddr;
> >             in_pcbnotifyall(&tcbtable, sintosa(&sin),
> >                 tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc);
> > -           splx(s);
> > -           return;
> > +           goto out;
> >     }
> >  
> >  #ifdef TCP_SACK
> > @@ -378,20 +412,29 @@ tcp_timer_rexmt(void *arg)
> >  
> >   out:
> >     splx(s);
> > +   KERNEL_UNLOCK();
> >  }
> >  
> >  void
> >  tcp_timer_persist(void *arg)
> >  {
> >     struct tcpcb *tp = arg;
> > +
> > +   task_add(softnettq, &tp->t_timer[TCPT_PERSIST].tcpt_task);
> > +}
> > +
> > +void
> > +tcp_timer_persist_task(void *arg)
> > +{
> > +   struct tcpcb *tp = arg;
> >     uint32_t rto;
> >     int s;
> >  
> > +   KERNEL_LOCK();
> >     s = splsoftnet();
> >     if ((tp->t_flags & TF_DEAD) ||
> >              TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
> > -           splx(s);
> > -           return;
> > +           goto out;
> >     }
> >     tcpstat.tcps_persisttimeo++;
> >     /*
> > @@ -417,19 +460,27 @@ tcp_timer_persist(void *arg)
> >     tp->t_force = 0;
> >   out:
> >     splx(s);
> > +   KERNEL_UNLOCK();
> >  }
> >  
> >  void
> >  tcp_timer_keep(void *arg)
> >  {
> >     struct tcpcb *tp = arg;
> > +
> > +   task_add(softnettq, &tp->t_timer[TCPT_KEEP].tcpt_task);
> > +}
> > +
> > +void
> > +tcp_timer_keep_task(void *arg)
> > +{
> > +   struct tcpcb *tp = arg;
> >     int s;
> >  
> > +   KERNEL_LOCK();
> >     s = splsoftnet();
> > -   if (tp->t_flags & TF_DEAD) {
> > -           splx(s);
> > -           return;
> > -   }
> > +   if (tp->t_flags & TF_DEAD)
> > +           goto out;
> >  
> >     tcpstat.tcps_keeptimeo++;
> >     if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
> > @@ -458,8 +509,9 @@ tcp_timer_keep(void *arg)
> >             TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
> >     } else
> >             TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> > -
> > + out:
> >     splx(s);
> > +   KERNEL_UNLOCK();
> >     return;
> >  
> >   dropit:
> > @@ -467,19 +519,27 @@ tcp_timer_keep(void *arg)
> >     tp = tcp_drop(tp, ETIMEDOUT);
> >  
> >     splx(s);
> > +   KERNEL_UNLOCK();
> >  }
> >  
> >  void
> >  tcp_timer_2msl(void *arg)
> >  {
> >     struct tcpcb *tp = arg;
> > +
> > +   task_add(softnettq, &tp->t_timer[TCPT_2MSL].tcpt_task);
> > +}
> > +
> > +void
> > +tcp_timer_2msl_task(void *arg)
> > +{
> > +   struct tcpcb *tp = arg;
> >     int s;
> >  
> > +   KERNEL_LOCK();
> >     s = splsoftnet();
> > -   if (tp->t_flags & TF_DEAD) {
> > -           splx(s);
> > -           return;
> > -   }
> > +   if (tp->t_flags & TF_DEAD)
> > +           goto out;
> >  
> >  #ifdef TCP_SACK
> >     tcp_timer_freesack(tp);
> > @@ -491,5 +551,7 @@ tcp_timer_2msl(void *arg)
> >     else
> >             tp = tcp_close(tp);
> >  
> > + out:
> >     splx(s);
> > +   KERNEL_UNLOCK();
> >  }
> > Index: netinet/tcp_subr.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
> > retrieving revision 1.154
> > diff -u -p -r1.154 tcp_subr.c
> > --- netinet/tcp_subr.c      6 Sep 2016 00:04:15 -0000       1.154
> > +++ netinet/tcp_subr.c      12 Sep 2016 11:41:30 -0000
> > @@ -526,6 +526,7 @@ tcp_close(struct tcpcb *tp)
> >     tcp_freeq(tp);
> >  
> >     tcp_canceltimers(tp);
> > +   tcp_destroytimers(tp);
> >     TCP_CLEAR_DELACK(tp);
> >     syn_cache_cleanup(tp);
> >  
> > @@ -542,7 +543,14 @@ tcp_close(struct tcpcb *tp)
> >             (void) m_free(tp->t_template);
> >  
> >     tp->t_flags |= TF_DEAD;
> > -   timeout_add(&tp->t_reap_to, 0);
> > +   /*
> > +    * If one of the timer tasks is already running, it must
> > +    * be spinning on the KERNEL_LOCK().  So schedule a G/C
> > +    * in one second, when we known the task will have released
> > +    * its reference.
> > +    */
> > +   KERNEL_ASSERT_LOCKED();
> > +   timeout_add_sec(&tp->t_reap_to, 1);
> >  
> >     inp->inp_ppcb = 0;
> >     soisdisconnected(so);
> > Index: netinet/tcp_timer.h
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/tcp_timer.h,v
> > retrieving revision 1.13
> > diff -u -p -r1.13 tcp_timer.h
> > --- netinet/tcp_timer.h     6 Jul 2011 23:44:20 -0000       1.13
> > +++ netinet/tcp_timer.h     12 Sep 2016 11:43:35 -0000
> > @@ -115,18 +115,25 @@ const char *tcptimers[] =
> >  /*
> >   * Init, arm, disarm, and test TCP timers.
> >   */
> > -#define    TCP_TIMER_INIT(tp, timer)                                       
> > \
> > -   timeout_set(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp)
> > +#define    TCP_TIMER_INIT(tp, timer) do {                                  
> > \
> > +   timeout_set(&(tp)->t_timer[(timer)].tcpt_timeout,               \
> > +       tcp_timer_funcs[(timer)], tp);                              \
> > +   task_set(&(tp)->t_timer[(timer)].tcpt_task,                     \
> > +       tcp_timer_tasks[(timer)], tp);                              \
> > +} while(0)
> >  
> >  #define    TCP_TIMER_ARM(tp, timer, nticks)                                
> > \
> > -   timeout_add(&(tp)->t_timer[(timer)], (nticks) * (hz / PR_SLOWHZ))
> > +   timeout_add(&(tp)->t_timer[(timer)].tcpt_timeout,               \
> > +       (nticks) * (hz / PR_SLOWHZ))
> >  
> >  #define    TCP_TIMER_DISARM(tp, timer)                                     
> > \
> > -   timeout_del(&(tp)->t_timer[(timer)])
> > +   timeout_del(&(tp)->t_timer[(timer)].tcpt_timeout)
> >  
> >  #define    TCP_TIMER_ISARMED(tp, timer)                                    
> > \
> > -   timeout_pending(&(tp)->t_timer[(timer)])
> > +   timeout_pending(&(tp)->t_timer[(timer)].tcpt_timeout)
> >  
> > +#define    TCP_TIMER_DESTROY(tp, timer)                                    
> > \
> > +   task_del(softnettq, &(tp)->t_timer[(timer)].tcpt_task)
> >  /*
> >   * Force a time value to be in a certain range.
> >   */
> > @@ -143,6 +150,7 @@ do {                                                    
> >                 \
> >  typedef void (*tcp_timer_func_t)(void *);
> >  
> >  extern const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS];
> > +extern const tcp_timer_func_t tcp_timer_tasks[TCPT_NTIMERS];
> >  
> >  extern int tcptv_keep_init;
> >  extern int tcp_always_keepalive;   /* assume SO_KEEPALIVE is always set */
> > Index: netinet/tcp_var.h
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/tcp_var.h,v
> > retrieving revision 1.115
> > diff -u -p -r1.115 tcp_var.h
> > --- netinet/tcp_var.h       20 Jul 2016 19:57:53 -0000      1.115
> > +++ netinet/tcp_var.h       12 Sep 2016 11:42:38 -0000
> > @@ -64,12 +64,17 @@ struct tcpqent {
> >     struct mbuf     *tcpqe_m;       /* mbuf contains packet */
> >  };
> >  
> > +struct tcptimer {
> > +   struct timeout  tcpt_timeout;
> > +   struct task     tcpt_task;
> > +};
> > +
> >  /*
> >   * Tcp control block, one per tcp; fields:
> >   */
> >  struct tcpcb {
> >     struct tcpqehead t_segq;                /* sequencing queue */
> > -   struct timeout t_timer[TCPT_NTIMERS];   /* tcp timers */
> > +   struct tcptimer t_timer[TCPT_NTIMERS];  /* tcp timers */
> >     short   t_state;                /* state of this connection */
> >     short   t_rxtshift;             /* log(2) of rexmt exp. backoff */
> >     short   t_rxtcur;               /* current retransmit value */
> > @@ -104,6 +109,7 @@ struct tcpcb {
> >     struct  mbuf *t_template;       /* skeletal packet for transmit */
> >     struct  inpcb *t_inpcb;         /* back pointer to internet pcb */
> >     struct  timeout t_delack_to;    /* delayed ACK callback */
> > +   struct  task t_delack_to_task;  /* task for the ACK callback */
> >  /*
> >   * The following fields are used as in the protocol specification.
> >   * See RFC793, Dec. 1981, page 21.
> > @@ -215,9 +221,12 @@ struct tcpcb {
> >  #ifdef _KERNEL
> >  extern int tcp_delack_ticks;
> >  void       tcp_delack(void *);
> > +void       tcp_delack_task(void *);
> >  
> > -#define TCP_INIT_DELACK(tp)                                                
> > \
> > -   timeout_set(&(tp)->t_delack_to, tcp_delack, tp)
> > +#define TCP_INIT_DELACK(tp) do {                                   \
> > +   timeout_set(&(tp)->t_delack_to, tcp_delack, tp);                \
> > +   task_set(&(tp)->t_delack_to_task, tcp_delack_task, tp);         \
> > +} while (/* CONSTCOND */ 0)
> >  
> >  #define TCP_RESTART_DELACK(tp)                                             
> > \
> >     timeout_add(&(tp)->t_delack_to, tcp_delack_ticks)
> > @@ -594,6 +603,7 @@ extern  int tcp_syn_cache_active; /* acti
> >  
> >  int         tcp_attach(struct socket *);
> >  void        tcp_canceltimers(struct tcpcb *);
> > +void        tcp_destroytimers(struct tcpcb *);
> >  struct tcpcb *
> >      tcp_close(struct tcpcb *);
> >  void        tcp_reaper(void *);
> > Index: netinet/tcp_debug.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/tcp_debug.c,v
> > retrieving revision 1.23
> > diff -u -p -r1.23 tcp_debug.c
> > --- netinet/tcp_debug.c     14 Mar 2015 03:38:52 -0000      1.23
> > +++ netinet/tcp_debug.c     12 Sep 2016 09:58:40 -0000
> > @@ -81,6 +81,7 @@
> >  #include <sys/mbuf.h>
> >  #include <sys/socket.h>
> >  #include <sys/protosw.h>
> > +#include <sys/task.h>
> >  
> >  #include <net/route.h>
> >  
> > Index: sys/systm.h
> > ===================================================================
> > RCS file: /cvs/src/sys/sys/systm.h,v
> > retrieving revision 1.116
> > diff -u -p -r1.116 systm.h
> > --- sys/systm.h     4 Sep 2016 09:22:29 -0000       1.116
> > +++ sys/systm.h     12 Sep 2016 10:01:13 -0000
> > @@ -287,6 +289,8 @@ struct uio;
> >  int        uiomove(void *, size_t, struct uio *);
> >  
> >  #if defined(_KERNEL)
> > +extern struct taskq *softnettq;            /* Network task queue. */
> > +
> >  __returns_twice int        setjmp(label_t *);
> >  __dead void        longjmp(label_t *);
> >  #endif
> > 
> 
> 

Reply via email to