On Mon, Aug 15, 2022 at 04:20:57PM +0300, Vitaliy Makkoveev wrote:
> tcp_close(struct tcpcb *tp)
> {
>         /* ... */
>         inp->inp_ppcb = NULL;
>         soisdisconnected(so);
>         in_pcbdetach(inp);
>         return (NULL);
> }
> 
> soisdisconnected(struct socket *so)
> {
>         soassertlocked(so);
>         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
>         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
>         wakeup(&so->so_timeo);
>         sowwakeup(so);
>         sorwakeup(so);
> }
> 
> So according the current code, we can't have the `inp' with NULL `tp'
> after tcp_close() call. Looks like we had missing netlock somewhere.

When I introduced the check, notlock was not invented.  We were
running on a single CPU due to kernel lock.

Back then sorwakeup() was calling somove().

So tcp_close() sets inp_ppcb = NULL, then though soisdisconnected(),
sorwakeup(),somove() PRU_RCVD was reached, finally in_pcbdetach()
is called too late.

This cannot happen anymore as somove() for TCP is running in a different
thread.

There is another possibility.  soisdisconnected(),sowwakeup() call
somove() for the other direction.  Then PRU_SEND and PRU_SENDOOB
may be reached.  But that also cannot happen, sowwakeup() just
starts a task in the sosplice thread now.

> What about to return EINVAL in the case, when `tp' is missing? In the
> somove() we don't check the PRU_RCVD returned value, so this path
> doesn't affected. In the other cases we stops to lie to the userland.

This is the most conservative approach.  Although I think it should
not happen with current implementation, we don't risk a panic.  Due
to the send case I would prefer to return the error from the socket.
Never loose the error of the socket.

        if ((inp = sotoinpcb(so)) == NULL || 
            (tp = intotcpcb(inp)) == NULL) {
                if (so->so_error)
                        return so->so_error;
                return EINVAL;
        }

With that OK bluhm@

> The `inp' without `tp' should not exist, right? This check exists only
> to silently prevent panics.

In general (inp != NULL && tp == NULL) does not exist, execpt for
special socket splicing corner cases.  There is no MP bug or
use-after-free.

> Index: sys/netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
> retrieving revision 1.187
> diff -u -p -r1.187 tcp_usrreq.c
> --- sys/netinet/tcp_usrreq.c  15 Aug 2022 09:11:39 -0000      1.187
> +++ sys/netinet/tcp_usrreq.c  15 Aug 2022 13:16:39 -0000
> @@ -142,6 +142,36 @@ struct   inpcbtable tcbtable;
>  int  tcp_fill_info(struct tcpcb *, struct socket *, struct mbuf *);
>  int  tcp_ident(void *, size_t *, void *, size_t, int);
>  
> +static inline int tcp_sogetpcb(struct socket *, struct inpcb **,
> +                      struct tcpcb **);
> +
> +static inline int
> +tcp_sogetpcb(struct socket *so, struct inpcb **rinp, struct tcpcb **rtp)
> +{
> +     struct inpcb *inp;
> +     struct tcpcb *tp;
> +
> +     /*
> +      * When a TCP is attached to a socket, then there will be
> +      * a (struct inpcb) pointed at by the socket, and this
> +      * structure will point at a subsidiary (struct tcpcb).
> +      */
> +     if ((inp = sotoinpcb(so)) == NULL) {
> +             if (so->so_error)
> +                     return so->so_error;
> +             return EINVAL;
> +     }
> +
> +     /* tp might get 0 when using socket splicing */
> +     if ((tp = intotcpcb(inp)) == NULL)
> +             return EINVAL;
> +
> +     *rinp = inp;
> +     *rtp = tp;
> +
> +     return 0;
> +}
> +
>  /*
>   * Process a TCP user request for TCP tb.  If this is a send request
>   * then m is the mbuf chain of send data.  If this is a timer expiration
> @@ -153,7 +183,7 @@ tcp_usrreq(struct socket *so, int req, s
>      struct mbuf *control, struct proc *p)
>  {
>       struct inpcb *inp;
> -     struct tcpcb *otp = NULL, *tp = NULL;
> +     struct tcpcb *otp = NULL, *tp;
>       int error = 0;
>       short ostate;
>  
> @@ -175,22 +205,9 @@ tcp_usrreq(struct socket *so, int req, s
>               goto release;
>       }
>  
> -     inp = sotoinpcb(so);
> -     /*
> -      * When a TCP is attached to a socket, then there will be
> -      * a (struct inpcb) pointed at by the socket, and this
> -      * structure will point at a subsidiary (struct tcpcb).
> -      */
> -     if (inp == NULL) {
> -             error = so->so_error;
> -             if (error == 0)
> -                     error = EINVAL;
> -             goto release;
> -     }
> -     tp = intotcpcb(inp);
> -     /* tp might get 0 when using socket splicing */
> -     if (tp == NULL)
> +     if ((error = tcp_sogetpcb(so, &inp, &tp)))
>               goto release;
> +
>       if (so->so_options & SO_DEBUG) {
>               otp = tp;
>               ostate = tp->t_state;
> @@ -739,28 +756,15 @@ int
>  tcp_detach(struct socket *so)
>  {
>       struct inpcb *inp;
> -     struct tcpcb *otp = NULL, *tp = NULL;
> +     struct tcpcb *otp = NULL, *tp;
>       int error = 0;
>       short ostate;
>  
>       soassertlocked(so);
>  
> -     inp = sotoinpcb(so);
> -     /*
> -      * When a TCP is attached to a socket, then there will be
> -      * a (struct inpcb) pointed at by the socket, and this
> -      * structure will point at a subsidiary (struct tcpcb).
> -      */
> -     if (inp == NULL) {
> -             error = so->so_error;
> -             if (error == 0)
> -                     error = EINVAL;
> +     if ((error = tcp_sogetpcb(so, &inp, &tp)))
>               return (error);
> -     }
> -     tp = intotcpcb(inp);
> -     /* tp might get 0 when using socket splicing */
> -     if (tp == NULL)
> -             return (0);
> +
>       if (so->so_options & SO_DEBUG) {
>               otp = tp;
>               ostate = tp->t_state;

Reply via email to