On Mon, Aug 15, 2022 at 04:20:57PM +0300, Vitaliy Makkoveev wrote:
> tcp_close(struct tcpcb *tp)
> {
> /* ... */
> inp->inp_ppcb = NULL;
> soisdisconnected(so);
> in_pcbdetach(inp);
> return (NULL);
> }
>
> soisdisconnected(struct socket *so)
> {
> soassertlocked(so);
> so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
> so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
> wakeup(&so->so_timeo);
> sowwakeup(so);
> sorwakeup(so);
> }
>
> So according the current code, we can't have the `inp' with NULL `tp'
> after tcp_close() call. Looks like we had missing netlock somewhere.
When I introduced the check, notlock was not invented. We were
running on a single CPU due to kernel lock.
Back then sorwakeup() was calling somove().
So tcp_close() sets inp_ppcb = NULL, then though soisdisconnected(),
sorwakeup(),somove() PRU_RCVD was reached, finally in_pcbdetach()
is called too late.
This cannot happen anymore as somove() for TCP is running in a different
thread.
There is another possibility. soisdisconnected(),sowwakeup() call
somove() for the other direction. Then PRU_SEND and PRU_SENDOOB
may be reached. But that also cannot happen, sowwakeup() just
starts a task in the sosplice thread now.
> What about to return EINVAL in the case, when `tp' is missing? In the
> somove() we don't check the PRU_RCVD returned value, so this path
> doesn't affected. In the other cases we stops to lie to the userland.
This is the most conservative approach. Although I think it should
not happen with current implementation, we don't risk a panic. Due
to the send case I would prefer to return the error from the socket.
Never loose the error of the socket.
if ((inp = sotoinpcb(so)) == NULL ||
(tp = intotcpcb(inp)) == NULL) {
if (so->so_error)
return so->so_error;
return EINVAL;
}
With that OK bluhm@
> The `inp' without `tp' should not exist, right? This check exists only
> to silently prevent panics.
In general (inp != NULL && tp == NULL) does not exist, execpt for
special socket splicing corner cases. There is no MP bug or
use-after-free.
> Index: sys/netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
> retrieving revision 1.187
> diff -u -p -r1.187 tcp_usrreq.c
> --- sys/netinet/tcp_usrreq.c 15 Aug 2022 09:11:39 -0000 1.187
> +++ sys/netinet/tcp_usrreq.c 15 Aug 2022 13:16:39 -0000
> @@ -142,6 +142,36 @@ struct inpcbtable tcbtable;
> int tcp_fill_info(struct tcpcb *, struct socket *, struct mbuf *);
> int tcp_ident(void *, size_t *, void *, size_t, int);
>
> +static inline int tcp_sogetpcb(struct socket *, struct inpcb **,
> + struct tcpcb **);
> +
> +static inline int
> +tcp_sogetpcb(struct socket *so, struct inpcb **rinp, struct tcpcb **rtp)
> +{
> + struct inpcb *inp;
> + struct tcpcb *tp;
> +
> + /*
> + * When a TCP is attached to a socket, then there will be
> + * a (struct inpcb) pointed at by the socket, and this
> + * structure will point at a subsidiary (struct tcpcb).
> + */
> + if ((inp = sotoinpcb(so)) == NULL) {
> + if (so->so_error)
> + return so->so_error;
> + return EINVAL;
> + }
> +
> + /* tp might get 0 when using socket splicing */
> + if ((tp = intotcpcb(inp)) == NULL)
> + return EINVAL;
> +
> + *rinp = inp;
> + *rtp = tp;
> +
> + return 0;
> +}
> +
> /*
> * Process a TCP user request for TCP tb. If this is a send request
> * then m is the mbuf chain of send data. If this is a timer expiration
> @@ -153,7 +183,7 @@ tcp_usrreq(struct socket *so, int req, s
> struct mbuf *control, struct proc *p)
> {
> struct inpcb *inp;
> - struct tcpcb *otp = NULL, *tp = NULL;
> + struct tcpcb *otp = NULL, *tp;
> int error = 0;
> short ostate;
>
> @@ -175,22 +205,9 @@ tcp_usrreq(struct socket *so, int req, s
> goto release;
> }
>
> - inp = sotoinpcb(so);
> - /*
> - * When a TCP is attached to a socket, then there will be
> - * a (struct inpcb) pointed at by the socket, and this
> - * structure will point at a subsidiary (struct tcpcb).
> - */
> - if (inp == NULL) {
> - error = so->so_error;
> - if (error == 0)
> - error = EINVAL;
> - goto release;
> - }
> - tp = intotcpcb(inp);
> - /* tp might get 0 when using socket splicing */
> - if (tp == NULL)
> + if ((error = tcp_sogetpcb(so, &inp, &tp)))
> goto release;
> +
> if (so->so_options & SO_DEBUG) {
> otp = tp;
> ostate = tp->t_state;
> @@ -739,28 +756,15 @@ int
> tcp_detach(struct socket *so)
> {
> struct inpcb *inp;
> - struct tcpcb *otp = NULL, *tp = NULL;
> + struct tcpcb *otp = NULL, *tp;
> int error = 0;
> short ostate;
>
> soassertlocked(so);
>
> - inp = sotoinpcb(so);
> - /*
> - * When a TCP is attached to a socket, then there will be
> - * a (struct inpcb) pointed at by the socket, and this
> - * structure will point at a subsidiary (struct tcpcb).
> - */
> - if (inp == NULL) {
> - error = so->so_error;
> - if (error == 0)
> - error = EINVAL;
> + if ((error = tcp_sogetpcb(so, &inp, &tp)))
> return (error);
> - }
> - tp = intotcpcb(inp);
> - /* tp might get 0 when using socket splicing */
> - if (tp == NULL)
> - return (0);
> +
> if (so->so_options & SO_DEBUG) {
> otp = tp;
> ostate = tp->t_state;