On Mon, Aug 15, 2022 at 04:28:28PM +0300, Vitaliy Makkoveev wrote:
> On Mon, Aug 15, 2022 at 04:20:57PM +0300, Vitaliy Makkoveev wrote:
> > On Mon, Aug 15, 2022 at 02:36:34PM +0200, Alexander Bluhm wrote:
> > > On Mon, Aug 15, 2022 at 02:29:02PM +0300, Vitaliy Makkoveev wrote:
> > > > Nothing special, just get assign `inp' and `tp' from passed socket. I
> > > > want to do this before (*pru_usrreq)() split to avoid huge code
> > > > duplication.
> > > > 
> > > > We have "tp might get 0 when using socket splicing" commentary. I
> > > > checked where we set `inp_ppcb' to NULL and found, tcp_close() is the
> > > > only place, and the following in_pcbdetach() sets `so_pcb' to NULL. So
> > > > this commentary looks wrong, because we can't have TCP sockets without
> > > > TCP pcb attached.
> > > 
> > > I added the comment and check after a panic in production.
> > > 
> > > ----------------------------
> > > revision 1.109
> > > date: 2012/01/03 21:50:12;  author: bluhm;  state: Exp;  lines: +6 -2;
> > > When used with socket splicing, tcp_usrreq() might get called with
> > > a socket that has an inp but tp is NULL.  The call stack for that
> > > is tcp_input() tcp_close() soisdisconnected() sorwakeup() somove()
> > > tcp_usrreq(PRU_RCVD).  To avoid a NULL dereference, just return in
> > > that case.
> > > ok henning@
> > > ----------------------------
> > > 
> > > tcp_input() calls tcp_close() after a reset packet.
> > 
> 
> May be I misunderstood something, but the socket should be destroyed
> after tcp_close() call:
> 
> void
> in_pcbdetach(struct inpcb *inp)
> {
>         struct socket *so = inp->inp_socket;
>         struct inpcbtable *table = inp->inp_table;
> 
>         NET_ASSERT_LOCKED();
> 
>         so->so_pcb = NULL;
>         /*
>          * As long as the NET_LOCK() is the default lock for Internet
>          * sockets, do not release it to not introduce new sleeping
>          * points.
>          */
>         sofree(so, 1);
>       /* ... */
> }
> 
> So it looks like the use-after-free in the somove() thread.
> 

This diff takes a references on sliced sockets and keeps them until
sounslice() called. I don't propose to commit it as is, may be sorele()
should be reworked to call sofree() itself.


Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.283
diff -u -p -r1.283 uipc_socket.c
--- sys/kern/uipc_socket.c      15 Aug 2022 09:11:38 -0000      1.283
+++ sys/kern/uipc_socket.c      15 Aug 2022 14:02:37 -0000
@@ -295,15 +295,6 @@ sofree(struct socket *so, int keep_lock)
                        sounlock(head);
        }
 
-       if (persocket) {
-               sounlock(so);
-               refcnt_finalize(&so->so_refcnt, "sofinal");
-               solock(so);
-       }
-
-       sigio_free(&so->so_sigio);
-       klist_free(&so->so_rcv.sb_sel.si_note);
-       klist_free(&so->so_snd.sb_sel.si_note);
 #ifdef SOCKET_SPLICE
        if (so->so_sp) {
                if (issplicedback(so)) {
@@ -322,6 +313,14 @@ sofree(struct socket *so, int keep_lock)
                }
        }
 #endif /* SOCKET_SPLICE */
+
+       sounlock(so);
+       refcnt_finalize(&so->so_refcnt, "sofinal");
+       solock(so);
+
+       sigio_free(&so->so_sigio);
+       klist_free(&so->so_rcv.sb_sel.si_note);
+       klist_free(&so->so_snd.sb_sel.si_note);
        sbrelease(so, &so->so_snd);
        sorflush(so);
        if (!keep_lock)
@@ -1329,6 +1328,8 @@ sosplice(struct socket *so, int fd, off_
        }
 
        /* Splice so and sosp together. */
+       soref(so);
+       soref(sosp);
        so->so_sp->ssp_socket = sosp;
        sosp->so_sp->ssp_soback = so;
        so->so_splicelen = 0;
@@ -1368,18 +1369,23 @@ sosplice(struct socket *so, int fd, off_
 void
 sounsplice(struct socket *so, struct socket *sosp, int freeing)
 {
+       struct socket *ssp_socket, *ssp_soback;
        soassertlocked(so);
 
        task_del(sosplice_taskq, &so->so_splicetask);
        timeout_del(&so->so_idleto);
        sosp->so_snd.sb_flags &= ~SB_SPLICE;
        so->so_rcv.sb_flags &= ~SB_SPLICE;
+       ssp_socket = so->so_sp->ssp_socket;
+       ssp_soback = sosp->so_sp->ssp_soback;
        so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
        /* Do not wakeup a socket that is about to be freed. */
        if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so))
                sorwakeup(so);
        if ((freeing & SOSP_FREEING_WRITE) == 0 && sowriteable(sosp))
                sowwakeup(sosp);
+       sorele(ssp_socket);
+       sorele(ssp_soback);
 }
 
 void
Index: sys/netinet/in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.270
diff -u -p -r1.270 in_pcb.c
--- sys/netinet/in_pcb.c        8 Aug 2022 12:06:30 -0000       1.270
+++ sys/netinet/in_pcb.c        15 Aug 2022 14:02:37 -0000
@@ -582,7 +582,16 @@ in_pcbdetach(struct inpcb *inp)
 
        NET_ASSERT_LOCKED();
 
+       mtx_enter(&table->inpt_mtx);
+       LIST_REMOVE(inp, inp_lhash);
+       LIST_REMOVE(inp, inp_hash);
+       TAILQ_REMOVE(&table->inpt_queue, inp, inp_queue);
+       table->inpt_count--;
+       mtx_leave(&table->inpt_mtx);
+
        so->so_pcb = NULL;
+       inp->inp_socket = NULL;
+
        /*
         * As long as the NET_LOCK() is the default lock for Internet
         * sockets, do not release it to not introduce new sleeping
@@ -608,12 +617,6 @@ in_pcbdetach(struct inpcb *inp)
                pf_inp_unlink(inp);
        }
 #endif
-       mtx_enter(&table->inpt_mtx);
-       LIST_REMOVE(inp, inp_lhash);
-       LIST_REMOVE(inp, inp_hash);
-       TAILQ_REMOVE(&table->inpt_queue, inp, inp_queue);
-       table->inpt_count--;
-       mtx_leave(&table->inpt_mtx);
 
        in_pcbunref(inp);
 }

Reply via email to